Module `vipy.image`

Expand source code Browse git

import os
import PIL.Image
import PIL.ImageFilter
import PIL.ExifTags
import vipy.show
import vipy.globals
from vipy.globals import log, cache
from vipy.util import isnumpy, isurl, isimageurl, to_iterable, tolist,\
    fileext, tempimage, mat2gray, imwrite, imwritegray, mergedict, \
    tempjpg, filetail, isimagefile, remkdir, hasextension, truncate_string, \
    try_import, tolist, islistoflists, istupleoftuples, isstring, \
    islist, isnumber, isnumpyarray, string_to_pil_interpolation, toextension, \
    shortuuid, iswebp, has_image_extension, tocache, stringhash
from vipy.geometry import BoundingBox, imagebox
import vipy.object
from vipy.object import greedy_assignment
import vipy.downloader
import urllib.request
import urllib.error
import urllib.parse
import http.client as httplib
import copy
from copy import deepcopy
import numpy as np
import shutil
import io
import matplotlib.pyplot as plt
import base64
import types
import hashlib
import time
import math
from itertools import zip_longest
import functools


try:
    import ujson as json  # faster
except ImportError:        
    import json  # fastish
    

class Image():
    """vipy.image.Image class
    
    The vipy image class provides a fluent, lazy interface for representing, transforming and visualizing images.
    The following constructors are supported:

    ```python
    im = vipy.image.Image(filename="/path/to/image.ext")
    ```
    
    All image file formats that are readable by PIL are supported here.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext")
    ```
    
    The image will be downloaded from the provided url and saved to a temporary filename.
    The environment variable VIPY_CACHE controls the location of the directory used for saving images, otherwise this will be saved to the system temp directory.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext", filename="/path/to/new/image.ext")
    ```

    The image will be downloaded from the provided url and saved to the provided filename.
    The url() method provides optional basic authentication set for username and password

    ```python
    im = vipy.image.Image(array=img, colorspace='rgb')
    ```

    The image will be constructed from a provided numpy array 'img', with an associated colorspace.  The numpy array and colorspace can be one of the following combinations:

    - 'rgb': uint8, three channel (red, green, blue)
    - 'rgba':  uint8, four channel (rgb + alpha)
    - 'bgr': uint8, three channel (blue, green, red), such as is returned from cv2.imread()
    - 'bgra':  uint8, four channel
    - 'hsv':  uint8, three channel (hue, saturation, value)
    - 'lum;:  uint8, one channel, luminance (8 bit grey level)
    - 'grey':  float32, one channel in range [0,1] (32 bit intensity)
    - 'float':  float32, any channel in range [-inf, +inf]
    
    The most general colorspace is 'float' which is used to manipulate images prior to network encoding, such as applying bias. 
    
    Args:
        filename: a path to an image file that is readable by PIL
        url:  a url string to an image file that is readable by PIL
        array: a numpy array of type uint8 or float32 of shape HxWxC=height x width x channels
        colorspace:  a string in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']
        attributes:  a python dictionary that is passed by reference to the image.  This is useful for encoding metadata about the image.  Accessible as im.attributes

    Returns:
        A `vipy.image.Image` object

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')
    def __init__(self, filename=None, url=None, array=None, colorspace=None, attributes=None):
        # Private attributes
        self._loader = None     # function to load an image, set with loader() method
        self._array = None
        self._colorspace = None
        
        # Initialization
        self._filename = filename
        if url is not None:
            assert isinstance(url, str) and url.startswith(('http://', 'https://', 'scp://', 's3://'))  # faster than vipy.util.isurl()
        self._url = url
        if array is not None:
            assert isnumpy(array), 'Invalid Array - Type "%s" must be np.array()' % (str(type(array)))
        self.array(array)  # shallow copy

        # Colorspace guesses:
        if not colorspace:
            # Guess RGB colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'rgb' if (self.isloaded() and self._array.ndim==3 and self._array.shape[2] == 3 and self._array.dtype == np.uint8) else colorspace

            # Guess LUM colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'lum' if (self.isloaded() and (self._array.ndim==2 or (self._array.ndim==3 and self._array.shape[2] == 1)) and self._array.dtype == np.uint8) else colorspace
            
            # Guess float colorspace if array is float32 and colorspace is not provided        
            colorspace = 'float' if (self.isloaded() and self._array.dtype == np.float32) else colorspace
            
        self.colorspace(colorspace)
        
        # Public attributes: passed in as a dictionary
        self.attributes = {} 
        if attributes is not None:
            assert isinstance(attributes, dict), "Attributes must be dictionary"
            self.attributes = attributes

    @classmethod
    def cast(cls, im):
        """Typecast the conformal vipy.image object im as `vipy.image.Image`.
        
        This is useful for downcasting `vipy.image.Scene` or `vipy.image.ImageDetection` down to an image.

        ```python
        ims = vipy.image.RandomScene()
        im = vipy.image.Image.cast(im)
        ```

        """
        assert isinstance(im, vipy.image.Image), "Invalid input - must derive from vipy.image.Image"
        return cls(filename=im._filename, url=im._url, array=im._array, colorspace=im._colorspace, attributes=im.attributes)


    @classmethod
    def from_dict(cls, d):
        d = {k.lstrip('_'):v for (k,v) in d.items()}  # prettyjson (remove "_" prefix to attributes)                
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   attributes=d['attributes'] if 'attributes' in d else None)
        
    @classmethod
    def from_uri(cls, uri):
        """Create an image object from an absolute file path or url"""
        assert vipy.util.isurl(uri) or vipy.util.isfile(uri), "invalid path"
        return cls(url=uri if vipy.util.isurl(uri) else None, filename=uri if vipy.util.isfile(uri) else None)            
    
    @classmethod
    def from_json(cls, s):
        """Import the JSON string s as an `vipy.image.Image` object.

        Args:
            s: json encoded string
        
        This will perform a round trip such that im1 == im2

        ```python
        im1 = vupy.image.RandomImage()
        im2 = vipy.image.Image.from_json(im1.json())
        assert im1 == im2
        ```

        Note: to construct from non-encoded json (e.g. a dict prior to dumps), use from_dict
        
        """
        return cls.from_dict(json.loads(s) if not isinstance(s, dict) else s)
    
    def __eq__(self, other):
        """Images are equivalent if they have the same filename, url and array"""
        return isinstance(other, Image) and other.filename()==self.filename() and other.url()==self.url() and np.all(other.array() == self.array())

    def __str__(self):
        return self.__repr__()

    def __iter__(self):
        """Yield single image for consistency with videos"""
        yield self

    def __len__(self):
        """Images have length 1 always"""
        return 1
    
    def __array__(self):
        """Called on np.array(self) for custom array container, (requires numpy >=1.16)"""
        return self.numpy()
    
    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self._array.shape[0], self._array.shape[1], self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.colorspace() == 'float':
            strlist.append('channels=%d' % self.channels())
        if self.filename() is not None:
            strlist.append('filename=%s' % self.filename())
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        return str('<vipy.image.Image: %s>' % (', '.join(strlist)))

    def sanitize(self):
        """Remove all private keys from the attributes dictionary.
        
        The attributes dictionary is useful storage for arbitrary (key,value) pairs.  However, this storage may contain sensitive information that should be scrubbed from the media before serialization.  As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key.  This is analogous to private or reserved attributes in the python lanugage.  Users should reserve these keynames for those keys that should be sanitized and removed before any serialization of this object.
        
        ```python
        assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
        ```

        """
        self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} if isinstance(self.attributes, dict) else self.attributes
        return self
    
    def print(self, prefix='', sleep=None):
        """Print the representation of the image and return self with an optional sleep=n seconds
        
        Useful for debugging or sequential visualization in long fluent chains.
        """
        print(prefix+self.__repr__())
        if sleep is not None:
            assert sleep > 0, "Sleep must be a non-negative number of seconds"
            time.sleep(sleep)
        return self

    def exif(self, extended=False):
        """Return the EXIF meta-data in filename as a dictionary.  Included non-base EXIF data if extended=True.  Returns empty dictionary if no EXIF exists.  Triggers download but not load."""

        d = {}
        if self.download().hasfilename():
            exif = PIL.Image.open(self.filename()).getexif()
            if exif is not None:
                d = {PIL.ExifTags.TAGS[k]:v for (k,v) in exif.items() if k in PIL.ExifTags.TAGS}

            if extended:
                for ifd_id in PIL.ExifTags.IFD:
                    try:
                        ifd = exif.get_ifd(ifd_id)                    
                        if ifd_id == PIL.ExifTags.IFD.GPSInfo:
                            resolve = PIL.ExifTags.GPSTAGS
                        else:
                            resolve = PIL.ExifTags.TAGS
                            
                            for k, v in ifd.items():
                                tag = resolve.get(k, k)
                                d[tag] = v
                    except KeyError:
                        pass
        return d
    
    def tile(self, tilewidth, tileheight, overlaprows=0, overlapcols=0):
        """Generate an image tiling.
        
        A tiling is a decomposition of an image into overlapping or non-overlapping rectangular regions.  

        Args:
            tilewidth: [int] the image width of each tile
            tileheight: [int] the image height of each tile
            overlaprows: [int] the number of overlapping rows (height) for each tile
            overlapcols: [int] the number of overlapping width (width) for each tile
    
        Returns:
            A list of `vipy.image.Image` objects such that each image is a single tile and the set of these tiles forms the original image
            Each image in the returned list contains the 'tile' attribute which encodes the crop used to create the tile.

        .. note:: 
            - `vipy.image.Image.tile` can be undone using `vipy.image.Image.untile`
            - The identity tiling is im.tile(im.width(), im.height(), overlaprows=0, overlapcols=0)
            - Ragged tiles outside the image boundary are zero padded
            - All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert tilewidth > 0 and tileheight > 0 and overlaprows >= 0 and overlapcols >= 0, "Invalid input"
        assert self.width() >= tilewidth-overlapcols and self.height() >= tileheight-overlaprows, "Invalid input" 
        bboxes = [BoundingBox(xmin=i, ymin=j, width=min(tilewidth, self.width()-i), height=min(tileheight, self.height()-j)) for i in range(0, self.width()-overlapcols, tilewidth-overlapcols) for j in range(0, self.height()-overlaprows, tileheight-overlaprows)]
        return [self.clone(shallow=True, attributes=True).setattribute('tile', {'crop':bb, 'shape':self.shape()}).crop(bb) for bb in bboxes]

    def union(self, other):
        """No-op for `vipy.image.Image`"""
        return self
    
    @classmethod
    def untile(cls, imlist):
        """Undo an image tiling and recreate the original image.

        ```python
        tiles = im.tile(im.width()/2, im.height()/2, 0, 0)
        imdst = vipy.image.Image.untile(tiles)
        assert imdst == im
        ```

        Args:
            imlist: this must be the output of `vipy.image.Image.tile`
        
        Returns:
            A  new `vipy.image.Image` object reconstructed from the tiling, such that this is equivalent to the input to vipy.image.Image.tile` 
        
        .. note:: All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert all([isinstance(im, vipy.image.Image) and im.hasattribute('tile') for im in imlist]), "invalid image tile list"        
        imc = None
        for im in imlist:
            if imc is None:
                imc = im.clone(shallow=True).array(np.zeros( (im.attributes['tile']['shape'][0], im.attributes['tile']['shape'][1], im.channels()), dtype=np.uint8))                
            imc = imc.splat(im.array(im.attributes['tile']['crop'].clone().to_origin().int().crop(im.array())), im.attributes['tile']['crop'])
            if hasattr(im, 'objectmap'):
                im.objectmap(lambda o: o.set_origin(im.attributes['tile']['crop']))  # FIXME: only for Scene()
            imc = imc.union(im)
        return imc
    
    def uncrop(self, bb, shape):
        """Uncrop using provided bounding box and zeropad to shape=(Height, Width).

        An uncrop is the inverse operation for a crop, which preserves the cropped portion of the image in the correct location and replaces the rest with zeros out to shape.
    
        ```python
        im = vipy.image.RandomImage(128, 128)
        bb = vipy.geometry.BoundingBox(xmin=0, ymin=0, width=64, height=64)
        uncrop = im.crop(bb).uncrop(bb, shape=(128,128))
        ```

        Args:
            bb: [`vipy.geometry.BoundingBox`] the bounding box used to crop the image in self
            shape: [tuple] (height, width) of the uncropped image
    
        Returns:
            this `vipy.image.Image` object with the pixels uncropped.

        .. note:: NOT idempotent.  This will generate different results if run more than once.
        """
        ((x,y,w,h), (H,W)) = (bb.xywh(), shape)
        ((dyb, dya), (dxb, dxa)) = ((int(y), int(H-(y+h))), (int(x), int(W-(x+w))))
        self._array = np.pad(self.load().array(),
                             ((dyb, dya), (dxb, dxa), (0, 0)) if
                             self.load().array().ndim == 3 else ((dyb, dya), (dxb, dxa)),
                             mode='constant')        
        return self

    def splat(self, im, bb):
        """Replace pixels within boundingbox in self with pixels in im"""
        assert isinstance(im, vipy.image.Image), "invalid image"
        assert (im.width() == bb.width() and im.height() == bb.height()) or bb.isinterior(im.width(), im.height()) and bb.isinterior(self.width(), self.height()), "Invalid bounding box '%s'" % str(bb)
        (x,y,w,h) = bb.xywh()
        self._array[int(y):int(y+h), int(x):int(x+w)] = im.array() if (im.width() == bb.width() and im.height() == bb.height()) else im.array()[int(y):int(y+h), int(x):int(x+w)]
        return self            
        
    def store(self):
        """Store the current image file as an attribute of this object.  Useful for archiving an object to be fully self contained without any external references.  
        
           -Remove this stored image using unstore()
           -Unpack this stored image and set up the filename using restore() 
           -This method is more efficient than load() followed by pkl(), as it stores the encoded image as a byte string.
           -Useful for creating a single self contained object for distributed processing.  

        ```python
        v == v.store().restore(v.filename()) 
        ```

        """
        assert self.hasfilename(), "Image file not found"
        with open(self.filename(), 'rb') as f:
            self.attributes['__image__'] = f.read()
        return self

    def unstore(self):
        """Delete the currently stored image from store()"""
        return self.delattribute('__image__')

    def restore(self, filename):
        """Save the currently stored image to filename, and set up filename"""
        assert self.hasattribute('__image__'), "Image not stored"
        with open(filename, 'wb') as f:
            f.write(self.attributes['__image__'])
        return self.filename(filename)                
    
    def abspath(self):
        """Change the path of the filename from a relative path to an absolute path (not relocatable)"""
        return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))

    def relpath(self, parent=None):
        """Replace the filename with a relative path to parent (or current working directory if none)"""
        parent = parent if parent is not None else os.getcwd()
        assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename())
        return self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent))

    def canload(self):
        """Return True if the image can be loaded successfully, useful for filtering bad links or corrupt images"""
        if not self.isloaded():
            try:
                if isimagefile(self._filename) and os.path.exists(self._filename):
                    PIL.Image.open(self._filename).verify()  # faster, throws exception on corrupted image
                else:
                    self.load().flush()  # fallback, load it and flush to avoid memory leak (expensive)
                return True
            except:
                return False
        else:
            return True
        
    def dict(self):
        """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding"""
        return {k.lstrip('_'):getattr(self, k) for k in Image.__slots__}  # prettyjson (remove "_" prefix to attributes)                                    

    def json(self, encode=True):
        if not vipy.util.is_jsonable(self.attributes):
            raise ValueError('attributes dictionary contains non-json elements and cannot be serialized.  Try self.clear_attributes() or self.sanitize()')        
        d = {k:v for (k,v) in self.dict().items() if v is not None}  # filter empty
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() first, then reload the image from backing filename/url after json import')
            d['array'] = self._array.tolist()
        return json.dumps(d) if encode else d
        
    def loader(self, f, x=None):
        """Lambda function to load an unsupported image filename to a numpy array.
        
        This lambda function will be executed during load and the result will be stored in self._array
        """
        self._loader = (f, x if x is not None else self.filename()) if f is not None else None
        return self

    @staticmethod
    def bytes_array_loader(x):
        """Load from a bytes array"""
        return np.array(PIL.Image.open(io.BytesIO(x)))
    
    @staticmethod    
    def PIL_loader(x):
        """Load from a PIL image file object"""
        return np.array(x)

    def has_loader(self):
        return self._loader is not None

    
    def load(self, verbose=False):
        """Load image to cached private '_array' attribute.

        Args:
            verbose: [bool] If true, show additional useful printed output

        Returns:
            This `vipy.image.Image` object with the pixels loaded in self._array as a numpy array.

        .. note:: This loader supports any image file format supported by PIL.  A custom loader can be added using `vipy.image.Image.loader`.
        """
        try:
            # Return if previously loaded image
            if self._array is not None:
                return self

            # Download URL to filename 
            if self._url is not None and not self.hasfilename():
                self.download(verbose=verbose)

            # Load filename to numpy array
            if self._loader is not None:
                (f,x) = self._loader
                self._array = f(x)
                if self.isluminance():
                    self.colorspace('lum')
                elif self.iscolor():
                    self.colorspace('rgb')
                else:
                    self._array = np.float32(self._array)
                    self.colorspace('float')

            elif isimagefile(self._filename):
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
                if self.istransparent():
                    self.colorspace('rgba')  # must be before iscolor()
                elif self.iscolor():
                    self.colorspace('rgb')
                elif self.isgrey():
                    self.colorspace('grey')
                elif self.isluminance():
                    self.colorspace('lum')
                else:
                    log.warning('unknown colorspace for image "%s" - attempting to coerce to colorspace=float' % str(self._filename))
                    self._array = np.float32(self._array)
                    self.colorspace('float')
            elif iswebp(self._filename):
                import vipy.video
                return vipy.video.Video(self._filename).load()
            elif self.hasfilename() and hasextension(self._filename):
                raise ValueError('Non-standard image extensions require a custom loader')
            elif self.hasfilename():
                # Attempting to open it anyway, may be an image file without an extension. Cross your fingers ...
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
            elif not self.hasfilename() and self.hasattribute('__shape'):
                # Loading a previously flushed buffer, load zeros so that we can display superclass objects
                self._array = np.zeros( self.getattribute('__shape') )
                self.delattribute('__shape')
            else:
                raise ValueError('image file not defined')
            
        except IOError:
            if verbose is True:
                log.error('IO error loading "%s" ' % self.filename())
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose is True:
                log.error('Load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def download(self, timeout=10, verbose=False, cached=False):
        """Download URL to filename provided by constructor, or to temp filename.

        Args:
            timeout: [int]  The timeout in seconds for an http or https connection attempt.  See also [urllib.request.urlopen](https://docs.python.org/3/library/urllib.request.html).
            verbose: [bool] If true, output more helpful message.
            cached: [bool] If true, use the cached previously downloaded file (if it exists)

        Returns:
            This `vipy.image.Image` object with the URL downloaded to `vipy.image.Image.filename` or to a `vipy.util.tempimage` filename which can be retrieved with `vipy.image.Image.filename`.
        """
        if self._url is None and self._filename is not None:
            return self
        if self._url is None or not isurl(str(self._url)):
            raise ValueError('[vipy.image.download][ERROR]: '
                             'Invalid URL "%s" ' % self._url)

        if self._filename is None:
            if vipy.globals.cache() is not None:
                # There is a potential race condition here when downloading files with common names like "main.jpg", add a (repeatable, hashed) 3 character subdir (<=4096 subdirs for ext3, max ~32K)
                self._filename = os.path.join(remkdir(vipy.globals.cache()), stringhash(self._url, 3), filetail(self._url.split('?')[0]))  # preserve image filename from url
                self._filename = self._filename+'.jpg' if not has_image_extension(self._filename) else self._filename  # guess JPG for URLs with no file extension (e.g. php)
            elif isimageurl(self._url):
                self._filename = tempimage(fileext(self._url))
            else:
                self._filename = tempjpg()  # guess JPG for URLs with no file extension

        if cached and self.hasfilename():
            return self
            
        try:
            url_scheme = urllib.parse.urlparse(self._url)[0]
            if url_scheme in ['http', 'https']:
                vipy.downloader.download(self._url,
                                         self._filename,
                                         verbose=verbose,
                                         progress=False,
                                         timeout=timeout,
                                         sha1=self.getattribute('url_sha1'),
                                         username=self.getattribute('url_username'),
                                         password=self.getattribute('url_password'))
            elif url_scheme == 'file':
                shutil.copyfile(self._url, self._filename)
            elif url_scheme == 's3':
                raise NotImplementedError('see vipy.downloader.s3()')                
            else:
                raise NotImplementedError(
                    'Invalid URL scheme "%s" for URL "%s"' %
                    (url_scheme, self._url))

        except (httplib.BadStatusLine,
                urllib.error.URLError,
                urllib.error.HTTPError):
            if verbose is True:
                log.error('download failed for url "%s"' % self._url)
            self._array = None
            raise

        except IOError:
            if verbose:
                log.error('IO error downloading "%s" -> "%s" ' % (self.url(), self.filename()))
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose:
                log.error('load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def reload(self):
        """Flush the image buffer to force reloading from file or URL"""
        return self.clone(flush=True).load()

    def isloaded(self):
        """Return True if `vipy.image.Image.load` was successful in reading the image, or if the pixels are present in `vipy.image.Image.array`."""
        return self._array is not None

    def loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None

    def is_loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None
    
    def isdownloaded(self):
        """Does the filename returned from `vipy.image.Image.filename` exist, meaning that the url has been downloaded to a local file?"""
        return self._filename is not None and os.path.exists(self._filename)

    def is_downloaded(self):
        """Alias for ``vipy.image.Image.isdownloaded`"""
        return self.isdownloaded()
    
    def downloadif(self, timeout=10, verbose=False):
        """Download URL to filename if the filename has not already been downloaded"""
        return self.download(timeout=timeout, verbose=verbose, cached=True) if self.hasurl() else self

    def try_download(self, timeout=10, verbose=False):
        """Attempt to download URL to filename if the filename has not already been downloaded, return object on failure.  Check `vipy.image.Image.is_downloaded` on returned object for success"""
        try:
            return self.downloadif(timeout=timeout, verbose=verbose)
        except:
            return self

    def try_load(self):
        """Attempt to load an image, return the object on failure.  Check `vipy.image.Image.is_loaded` on returned object for success"""
        try:
            return self.load()
        except:
            return self
        
    def channels(self):
        """Return integer number of color channels"""
        return self.load().channels() if not self.isloaded() else (1 if self._array.ndim==2 else self._array.shape[2])

    def iscolor(self):
        """Color images are three channel or four channel with transparency, float32 or uint8"""
        return self.channels() == 3 or self.channels() == 4

    def istransparent(self):
        """Transparent images are four channel color images with transparency, float32 or uint8.  Return true if this image contains an alpha transparency channel"""
        return self.channels() == 4

    def blend(self, im, alpha):
        """alpha blend self and im in-place, such that self = alpha*self + (1-alpha)*im"""
        assert isinstance(im, Image)
        assert alpha >=0 and alpha <= 1
        assert self.colorspace() not in ['float','rgba','bgra'], "convert to rgb first"
        return self.load().map(lambda arr: np.uint8(alpha * arr + (1-alpha)*im.clone().load()._to_colorspace(self.colorspace()).resize_like(self).array()))
                
    def isgrey(self):
        """Grey images are one channel, float32"""
        return self.channels() == 1 and self.array().dtype == np.float32

    def isluminance(self):
        """Luninance images are one channel, uint8"""
        return self.channels() == 1 and self.array().dtype == np.uint8

    def filesize(self):
        """Return size of underlying image file, requires fetching metadata from filesystem"""
        assert self.hasfilename(), 'Invalid image filename'
        return os.path.getsize(self._filename)

    def width(self):
        """Return the width (columns) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().array().shape[1]

    def height(self):
        """Return the height (rows) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """        
        return self.load().array().shape[0]
    
    def shape(self):
        """Return the (height, width) or equivalently (rows, cols) of the image.
        
        Returns:
            A tuple (height=int, width=int) of the image.

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return (self.load().height(), self.width())

    def aspectratio(self):
        """Return the aspect ratio of the image as (width/height) ratio.

        Returns:
            A float equivalent to (`vipy.image.Image.width` / `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().width() / float(self.height())

    def area(self):
        """Return the area of the image as (width * height).

        Returns:
            An integer equivalent to (`vipy.image.Image.width` * `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.width()*self.height()
    
    def centroid(self):
        """Return the real valued center pixel coordinates of the image (col=x,row=y).
        
        The centroid is equivalent to half the `vipy.image.Image.shape`.

        Returns:
            A tuple (column, row) of the floating point center of the image.
        """
        return (self.load().width() / 2.0, self.height() / 2.0)

    def centerpixel(self):
        """Return the integer valued center pixel coordinates of the image (col=i,row=j)

        The centerpixel is equivalent to half the `vipy.image.Image.shape` floored to the nearest integer pixel coordinate.

        Returns:
            A tuple (int(column), int(row)) of the integer center of the image.
        """
        c = np.round(self.centroid())
        return (int(c[0]), int(c[1]))
    
    def array(self, np_array=None, copy=False):
        """Replace self._array with provided numpy array

        Args:
            np_array: [numpy array] A new array to use as the pixel buffer for this image.
            copy: [bool] If true, copy the buffer using np.copy(), else use a reference to this buffer.

        Returns:
            - If np_array is not None, return the `vipy.image.Image` object such that this object points to the provided numpy array as the pixel buffer
            - If np_array is None, then return the numpy array.

        .. notes:: 
            - If copy=False, then this `vipy.image.Image` object will share the pixel buffer with the owner of np_array.  Changes to pixels in this buffer will be shared.  
            - If copy=True, then this will significantly slow down processing for large images.  Use referneces wherevery possible.
        """
        if np_array is None:
            return self._array if copy is False else np.copy(self._array)
        elif isnumpyarray(np_array):
            self._array = np.copy(np_array) if copy else np_array  # reference or copy
            assert self._array.dtype == np.float32 or self._array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32 and not type='%s'" % (str(self._array.dtype))                        
            self.colorspace(None)  # must be set with colorspace() after array() but before _to_colorspace()
            return self
        else:
            raise ValueError('Invalid input - array() must be numpy array and not "%s"' % (str(type(np_array))))

    def fromarray(self, data):
        """Alias for `vipy.image.Image.array` with copy=True. This will set new numpy array as the pixel buffer with a numpy array copy"""
        return self.array(data, copy=True)
    
    def tonumpy(self):
        """Alias for `vipy.image.Image.numpy"""
        return self.numpy()

    def numpy(self):
        """Return a mutable numpy array for this `vipy.image.Image`.

        .. notes:: 
            - This will always return a writeable array with the 'WRITEABLE' numpy flag set.  This is useful for returning a mutable numpy array as needed while keeping the original non-mutable numpy array (e.g. loaded from a video or PIL) as the underlying pixel buffer for efficiency reasons.
            - Triggers a `vipy.image.Image.load` if the pixel buffer has not been loaded
            - This will trigger a copy if the ['WRITEABLE' flag](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html) is not set. 
        """        
        self.load()
        self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array  # triggers copy         
        return self._array

    def channel(self, k=None):
        """Return a cloned Image() object for the kth channel, or return an iterator over channels if k=None.

        Iterate over channels as single channel luminance images:

        ```python
        for c in self.channel():
            print(c)
        ```

        Return the kth channel as a single channel luminance image:

        ```python
        c = self.channel(k=0)
        ```

        """
        if k is None:
            return [self.channel(j) for j in range(0, self.channels())]
        elif k == 0 and self.channels() == 1:
            return self
        else:
            assert k < self.channels() and k>=0, "Requested channel=%d must be within valid channels=%d" % (k, self.channels())
            im = self.clone().load()
            im._array = im._array[:,:,k]
            im._colorspace = 'lum'
            return im

    def channelmean(self):
        """Return a cloned Image() object for the mean of all channels followed by returning a single channel float image.

        This is useful for visualizing multichannel images by reducing the channels to one

        ```python
        vipy.image.Image(array=np.random.rand(3,3,16).astype(np.float32)).channelmean().mat2gray().lum().show()
        ```
        
        """
        im = self.clone().load()
        im._array = np.mean(im._array, axis=2, keepdims=True)
        im._colorspace = 'float'
        return im
        
    def red(self):
        """Return red channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':
        
        ```python
        self.red() == self.channel(0) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.red() == self.channel(3) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(0)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(3)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def green(self):
        """Return green channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.green() == self.channel(1) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.green() == self.channel(1) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(1)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(1)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def blue(self):
        """Return blue channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.vlue() == self.channel(2) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.blue() == self.channel(0) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(2)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(0)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())                

    def alpha(self):
        """Return alpha (transparency) channel as a cloned single channel `vipy.image.Image` object"""
        assert self.channels() == 4 and self.colorspace() in ['rgba', 'bgra'], "Must be four channnel color image"
        return self.channel(3)
        
    def zeros(self):
        """Set the pixel buffer to all zeros of the same shape and datatype as this `vipy.image.Image` object.
        
        These are equivalent operations for the resulting buffer shape: 
        
        ```python
        import numpy as np
        np.zeros( (self.width(), self.height(), self.channels()) ) == self.zeros().array()
        ```

        Returns:
           This `vipy.image.Image` object.

        .. note:: Triggers load() if the pixel buffer has not been loaded yet.
        """
        self._array = 0*self.load()._array
        return self

    def pil(self):
        """Convert vipy.image.Image to PIL Image.
        
        Returns:
            A [PIL image](https://pillow.readthedocs.io/en/stable/reference/Image.html) object, that shares the pixel buffer by reference
        """
        if self.isloaded():
            assert self.channels() in [1,3,4] and (self.channels() == 1 or self.colorspace() != 'float'), "Incompatible with PIL"
            return PIL.Image.fromarray(self.numpy(), mode='RGB' if self.colorspace()=='rgb' else None)  # FIXME: mode='RGB' triggers slow tobytes() conversion, need RGBA or RGBX
        elif self.hasfilename():
            return PIL.Image.open(self.filename())
        else:
            return None
            
    def blur(self, sigma=3):
        """Apply a Gaussian blur with Gaussian kernel radius=sigma to the pixel buffer.
        
        Args:
            sigma: [float >=0] The gaussian blur kernel radius.

        Returns:
            This `vipy.image.Image` object with the pixel buffer blurred in place.
        """
        assert sigma >= 0
        return self.array(np.array(self.pil().filter(PIL.ImageFilter.GaussianBlur(radius=sigma)))) if sigma>0 else self
        
    def torch(self, order='CHW'):
        """Convert the batch of 1 HxWxC images to a CxHxW torch tensor.

        Args:
            order: ['CHW', 'HWC', 'NCHW', 'NHWC'].  The axis order of the torch tensor (channels, height, width) or (height, width, channels) or (1, channels, height, width) or (1, height, width, channels)

        Returns:
            A CxHxW or HxWxC or 1xCxHxW or 1xHxWxC [torch tensor](https://pytorch.org/docs/stable/tensors.html) that shares the pixel buffer of this image object by reference.

        .. note:: This supports numpy types and does not support bfloat16
        """
        from torch import from_numpy;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow)
        
        assert order in ['CHW', 'HWC', 'NCHW', 'NHWC']
        img = self.numpy() if self.array().ndim >= 3 else np.expand_dims(self.array(), 2)  # HxW -> HxWx1 
        
        if order in ['CHW']:
            assert img.ndim == 3, "invalid array"  
            img = img.transpose(2,0,1) # HxWxC -> CxHxW
        elif order in ['NCHW']:
            img = img.transpose(3,2,0,1) if img.ndim == 4 else np.expand_dims(img.transpose(2,0,1), 0)
        if order in ['NHWC']:
            img = img.transpose(3,0,1,2) if img.ndim == 4 else np.expand_dims(img, 0)
        return from_numpy(img)   # pip install torch

    
    @staticmethod
    def from_torch(x, order='CHW'):
        """Convert a 1xCxHxW, CxHxW or NxCxHxW torch tensor (or numpy array with torch channel order) to HxWxC numpy array, returns new `vipy.image.Image` with inferred colorspace corresponding to data type in x"""
        from torch import Tensor, is_tensor;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow) 
        assert isinstance(x, Tensor) or isinstance(x, np.ndarray), "Invalid input type '%s'- must be torch.Tensor" % (str(type(x)))
        assert x.ndim == 4 or x.ndim == 3, "Torch tensor must be shape 1xCxHxW, CxHxW, or NxCxHxW"
        x = x.squeeze(0) if (x.ndim == 4 and x.shape[0] == 1) else x

        if order == 'CHW':
            x = x.permute(1,2,0).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,2,0)   # CxHxW -> HxWxC, copied            
        elif order == 'WHC':
            x = x.permute(1,0,2).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,0,2)   # WxHxC -> HxWxC, copied        
        elif order == 'HWC':
            x = x.cpu().detach().float().numpy() if is_tensor(x) else np.copy(x)  # HxWxC -> HxWxC, copied        
        elif order == 'NCHW':
            assert x.ndim == 4, "invalid shape"
            x = x.permute(2,3,1,0).cpu().detach().float().numpy()  # NxCxHxW -> HxWxCxN, copied        
        else:
            raise ValueError('unknown axis order "%s"' % order)

        img = x
        colorspace = 'float' if img.dtype == np.float32 else None
        colorspace = 'rgb' if img.dtype == np.uint8 and img.shape[2] == 3 else colorspace  # assumed
        colorspace = 'lum' if img.dtype == np.uint8 and img.shape[2] == 1 else colorspace
        return Image(array=img, colorspace=colorspace)

    @staticmethod
    def fromtorch(x, order='CHW'):
        """Alias for `vipy.image.Image.from_torch`"""
        return Image.from_torch(x, order)
    
    def unload(self):
        """Remove cached file and loaded array.  Note that this will delete the underlying file returned by filename() if there is a backing url, cleaning up cached files and forcing re-download"""
        if self.hasurl() and self.hasfilename():
            log.info('Removing "%s"'% self._filename)
            os.remove(self._filename)
            self._filename = None
        if self.isloaded():
            self.flush()
        return self

    def uncache(self):
        """Alias for `vipy.image.Image.unload`"""
        return self.unload()
    
    def filename(self, newfile=None):
        """Return or set image filename"""
        if newfile is None:
            return self._filename
        else:
            self._filename = newfile
            return self

    def clear_filename(self):
        """Remove the current filename from the object in-place and return the object"""        
        self._filename = None
        return self
    
    def url(self, url=None, username=None, password=None, sha1=None):
        """Image URL and URL download properties"""
        if url is not None:
            self._url = url  # this does not change anything else (e.g. the associated filename), better to use constructor 
        if username is not None:
            self.setattribute('url_username', username)
        if password is not None:
            self.setattribute('url_password', password)
        if sha1 is not None:
            self.setattribute('url_sha1', sha1)
        if url is None and username is None and password is None and sha1 is None:
            return self._url
        else:
            return self
    
    def colorspace(self, colorspace=None):
        """Return or set the colorspace as ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']"""
        if colorspace is None:
            return self._colorspace
        else:
            assert str(colorspace).lower() in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s'. Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum']" % colorspace 
            img = self.array()
            if self.isloaded():
                colorspace = str(colorspace).lower()
                if self.array().dtype == np.float32:
                    assert colorspace in ['float', 'grey', 'gray'], "Invalid colorspace '%s' for float32 array()" % colorspace
                elif self.array().dtype == np.uint8:
                    assert colorspace in  ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum'], "Invalid colorspace '%s' for uint8 array(). Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum']" % colorspace
                else:
                    raise ValueError('unupported array() datatype "%s".  Allowable is [np.float32, np.uint8]' % colorspace)  # should never get here as long as array() is used to set _array
                if self.channels() == 1:
                    assert colorspace in ['float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s; for single channel array.  Allowable is ['float', 'grey', 'gray', 'lum']" % colorspace
                elif self.channels() == 3:
                    assert colorspace in ['float', 'rgb', 'bgr', 'hsv'], "Invalid colorspace '%s; for three channel array. Allowable is ['float', 'rgb', 'bgr', 'hsv']" % colorspace
                elif self.channels() == 4:
                    assert colorspace in ['float', 'rgba', 'bgra'], "Invalid colorspace '%s; for four channel array. Allowable is ['float', 'rgba', 'bgra']" % colorspace                    
                elif colorspace != 'float':
                    raise ValueError("Invalid colorspace '%s' for image channels=%d, type=%s" % (colorspace, self.channels(), str(self.array().dtype)))
                if colorspace in ['grey', 'gray']:
                    assert self.max() <= 1 and self.min() >= 0, "Colorspace 'grey' image must be np.float32 in range [0,1].  Use colorspace 'lum' for np.uint8 in range [0,255], or colorspace 'float' for unconstrained np.float32 [-inf, +inf]"
                    colorspace = 'grey'  # standardize
            self._colorspace = str(colorspace).lower()
            return self

    def uri(self):
        """Return the URI of the image object, either the URL or the filename, raise exception if neither defined"""
        if self.hasurl():
            return self.url()
        elif self.hasfilename():
            return self.filename()
        else:
            raise ValueError('No URI defined')

    def set_attribute(self, key, value):
        """Set element self.attributes[key]=value"""
        if self.attributes is None:
            self.attributes = {key: value}
        else:
            self.attributes[key] = value
        return self
    
    def setattribute(self, key, value):
        return self.set_attribute(key, value)
        
    def setattributes(self, newattr):
        """Set many attributes at once by providing a dictionary to be merged with current attributes"""
        assert isinstance(newattr, dict), "New attributes must be dictionary"
        self.attributes.update(newattr)
        return self
    
    def getattribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.get_attribute(k)

    def get_attribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.attributes[k] if k in self.attributes else None        
    
    def clear_attributes(self):
        self.attributes = {}
        return self
    
    def hasattribute(self, key):
        return self.attributes is not None and key in self.attributes

    def delattribute(self, k):
        return self.del_attribute(k)
    
    def del_attribute(self, k):
        if k in self.attributes:
            self.attributes.pop(k)
        return self
        
    def delattributes(self, atts):
        for k in tolist(atts):
            self.delattribute(k)
        return self

    def append_attribute(self, key, value):
        """Append the value to attribute key, creating the key as an empty list if it does not exist"""
        if key not in self.attributes:
            self.attributes[key] = []
        self.attributes[key].append(value)
        return self
    
    def metadata(self, k=None):
        """Return metadata associated with this image, stored in the attributes dictionary"""
        return self.attributes if k is None else self.getattribute(k)
    
    def hasurl(self):
        """synonym for `vipy.image.has_url`"""
        return self.has_url()

    def has_url(self):
        """Return True if the image has a URL input source"""
        return self._url is not None
    
    def has_filename(self):
        """Return True if the image has a filename input source and this file exists"""
        return self._filename is not None and os.path.exists(self._filename)

    def hasfilename(self):
        """synonym for has_filename"""
        return self.has_filename()
    
    def clone(self, flushforward=False, flushbackward=False, flush=False, shallow=False, attributes=False, dereference=False):
        """Create deep copy of object, flushing the original buffer if requested and returning the cloned object.
        Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned 
        object which can be used for encoding and will be garbage collected.
        
            * flushforward: copy the object, and set the cloned object array() to None.  This flushes the video buffer for the clone, not the object
            * flushbackward:  copy the object, and set the object array() to None.  This flushes the video buffer for the object, not the clone.
            * flush:  set the object array() to None and clone the object.  This flushes the video buffer for both the clone and the object.
            * dereference: remove both the filename and URL (if present) in the cloned object, leaving only the buffer
        """
        if flush or (flushforward and flushbackward):
            self.flush()  # flushes buffer on object and clone
            im = copy.deepcopy(self)  # object and clone are flushed
        elif flushbackward:
            im = copy.deepcopy(self)  # propagates _array to clone
            self.flush()  # object flushed, clone not flushed
        elif flushforward:            
            array = self._array;
            self._array = None
            im = copy.deepcopy(self)   # does not propagate _array to clone
            self._array = array    # object not flushed
            im.flush()
        elif shallow:
            im = copy.copy(self)  # shallow copy
            im._array = np.asarray(self._array) if self._array is not None else None  # shared pixels            
        else:
            im = copy.deepcopy(self)
        if attributes:
            im.attributes = copy.deepcopy(self.attributes)
        if dereference:
            assert im._array is not None, "image buffer required"
            im._filename = None
            im._url = None
        return im

    def flush(self):
        """flush the image buffer in place, alias for self.clone(flush=True)"""        
        if not (self.hasfilename() or self.hasurl()):
            self.setattribute('__shape', (self.height(), self.width(), self.channels()))  # to load zeros
        self._array = None  # flushes buffer on object
        return self

        
    # Spatial transformations
    def resize(self, cols=None, rows=None, width=None, height=None, interp='bilinear', fast=False):
        """Resize the image buffer to (rows x cols) with bilinear interpolation.  If rows or cols is provided, rescale image maintaining aspect ratio"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width
        if cols is None or rows is None:
            if cols is None:
                scale = float(rows) / float(self.height())
            else:
                scale = float(cols) / float(self.width())
            self.rescale(scale)
        elif rows == self.height() and cols == self.width():
            return self  
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.array(im.pil().resize((cols, rows), string_to_pil_interpolation(interp))) for im in self.channel()])
        else:
            self._array = np.asarray(self.load().pil().resize((cols, rows), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))  
        return self

    def resize_like(self, im, interp='bilinear'):
        """Resize image buffer to be the same size as the provided vipy.image.Image()"""
        assert isinstance(im, Image), "Invalid input - Must be vipy.image.Image"
        return self.resize(im.width(), im.height(), interp=interp)
    
    def rescale(self, scale=1, interp='bilinear', fast=False):
        """Scale the image buffer by the given factor - NOT idempotent"""
        (height, width) = self.load().shape()
        if scale == 1:
            return self
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.asarray(im.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp))) for im in self.channel()])
        else: 
            self._array = np.asarray(self.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that maximum dimension of image = dim, or return maxdim()"""
        return self.rescale(float(dim) / float(np.maximum(self.height(), self.width())), interp=interp) if dim is not None else max(self.shape())

    def mindim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that minimum dimension of image = dim, or return mindim()"""
        if dim is None:
            return np.minimum(self.height(), self.width())
        else:
            s = float(dim) / float(np.minimum(self.height(), self.width()))
            return self.rescale(s, interp=interp) if dim is not None else min(self.shape())

    def mindimn(self, dim=None):
        """Frequently used shortcut for mindim(dim, interp='nearest')"""
        return self.mindim(dim, interp='nearest')
    
    def _pad(self, dx, dy, mode='edge'):
        """Pad image using np.pad mode, dx=padwidth, dy=padheight, thin wrapper for numpy.pad"""
        self._array = np.pad(self.load().array(),
                             ((dy, dy), (dx, dx), (0, 0)) if
                             self.load().array().ndim == 3 else ((dy, dy), (dx, dx)),
                             mode=mode)
        return self

    def pad(self, padwidth, padheight):
        """Alias for `vipy.image.Image.zeropad`"""
        return self.zeropad(padwidth, padheight)
    
    def zeropad(self, padwidth, padheight):
        """Pad image using np.pad constant by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        if self.channels() > 1 or self._array.ndim == 3:
            pad_shape = (padheight, padwidth, (0, 0))
        else:
            pad_shape = (padheight, padwidth)

        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        if padwidth[0]>0 or padwidth[1]>0 or padheight[0]>0 or padheight[1]>0:
            self._array = np.pad(self.load().array(), pad_width=pad_shape, mode='constant', constant_values=0)  # this is still slow due to the required copy, but fast-ish in np >= 1.17
            
        return self

    def zeropadlike(self, width, height):
        """Zero pad the image balancing the border so that the resulting image size is (width, height)"""
        assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height())
        return self.zeropad( (int(np.floor((width - self.width())/2)), int(np.ceil((width - self.width())/2))),
                             (int(np.floor((height - self.height())/2)), int(np.ceil((height - self.height())/2))))
                            
    def meanpad(self, padwidth, padheight, mu=None):
        """Pad image using np.pad constant=image mean by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""        
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        mu = self.meanchannel() if mu is None else mu
        self._array = np.squeeze(np.dstack([np.pad(img,
                                                   pad_width=(padheight,padwidth),
                                                   mode='constant',
                                                   constant_values=c) for (img,c) in zip(self.channel(), mu)]))
        return self

    def alphapad(self, padwidth, padheight):
        """Pad image using alpha transparency by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        assert self.colorspace() == 'rgba', "Colorspace must be RGBA for padding with transparency"
        return self.meanpad(padwidth, padheight, mu=np.array([0,0,0,0]))
    
    def minsquare(self):
        """Crop image of size (HxW) to (min(H,W), min(H,W)), keeping upper left corner constant"""
        S = np.min(self.load().shape())
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(S), height=int(S)))

    def maxsquare(self, S=None):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with zeropadding or (S,S) if provided, keeping upper left corner constant"""
        S = np.max(self.load().shape()) if S is None else int(S)
        (H, W) = self.shape()
        (dW, dH) = (max(0, S - W), max(0, S - H))
        if S != W or S != H:
            self._crop(BoundingBox(0, 0, width=min(W, S), height=min(H, S)))
            if (dW > 0 or dH > 0):
                self.zeropad((0,dW), (0,dH))  # crop then zeropad
        return self

    def maxmatte(self):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with balanced zeropadding forming a letterbox with top/bottom matte or pillarbox with left/right matte"""
        S = np.max(self.load().shape())
        dW = S - self.width()
        dH = S - self.height()
        return self.zeropad((int(np.floor(dW//2)), int(np.ceil(dW//2))), (int(np.floor(dH//2)), int(np.ceil(dH//2))))._crop(BoundingBox(0, 0, width=int(S), height=int(S)))
    
    def centersquare(self):
        """Crop image of size (NxN) in the center, such that N=min(width,height), keeping the image centroid constant"""
        N = int(np.min(self.shape()))
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=N, height=N))

    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner"""
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def _crop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, clipping the box to the image rectangle"""
        assert isinstance(bbox, BoundingBox) and bbox.valid(), "Invalid input - Must be vipy.geometry.BoundingBox not '%s'" % (str(type(bbox)))
        if not bbox.isdegenerate() and bbox.hasoverlap(self.load().array()):
            bbox = bbox.imclip(self.load().array()).int()
            self._array = self.array()[bbox.ymin():bbox.ymax(),
                                       bbox.xmin():bbox.xmax()]
        else:
            log.warning('BoundingBox for crop() does not intersect image rectangle')
        return self

    def crop(self, bbox):
        return self._crop(bbox)
    
    def fliplr(self):
        """Mirror the image buffer about the vertical axis - Not idempotent"""
        self._array = np.fliplr(self.load().array())
        return self

    def flipud(self):
        """Mirror the image buffer about the horizontal axis - Not idempotent"""
        self._array = np.flipud(self.load().array())
        return self
    
    def imagebox(self):
        """Return the bounding box for the image rectangle"""
        return BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))

    def border_mask(self, pad):
        """Return a binary uint8 image the same size as self, with a border of pad pixels in width or height around the edge"""
        img = np.zeros( (self.height(), self.width()), dtype=np.uint8)
        img[0:pad,:] = 1
        img[-pad:,:] = 1
        img[:,0:pad] = 1
        img[:,-pad:] = 1
        return img
    
    # Color conversion
    def _to_colorspace(self, to):
        """Supported colorspaces are rgb, rgba, bgr, bgra, hsv, grey, lum, float"""
        to = to if to != 'gray' else 'grey'  # standardize 'gray' -> 'grey' internally
        self.load()
        if self.colorspace() == to:
            return self
        elif to == 'float':
            img = self.load().array()  # any type
            self._array = np.array(img).astype(np.float32)  # typecast to float32
        elif self.colorspace() == 'lum':
            img = self.load().array()  # single channel, uint8 [0,255]
            assert img.dtype == np.uint8
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel            
            self._array = np.array(PIL.Image.fromarray(img, mode='L').convert('RGB'))  # uint8 luminance [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() in ['gray', 'grey']:
            img = self.load().array()  # single channel float32 [0,1]
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel                        
            self._array = np.array(PIL.Image.fromarray(255.0 * img, mode='F').convert('RGB'))  # float32 gray [0,1] -> float32 gray [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'rgba':
            img = self.load().array()  # uint8 RGBA
            if to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGBA -> uint8 ABGR
                self._array = self._array[:,:,[1,2,3,0]]  # uint8 ABGR -> uint8 BGRA
            elif to == 'rgb':
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
            else:
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
                self.colorspace('rgb')
                self._to_colorspace(to)
        elif self.colorspace() == 'rgb':
            img = self.load().array()  # uint8 RGB
            if to in ['grey', 'gray']:
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(img).convert('L')).astype(np.float32)  # uint8 RGB -> float32 Grey [0,255] -> float32 Grey [0,1]
            elif to == 'bgr':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
            elif to == 'hsv':
                self._array = np.array(PIL.Image.fromarray(img).convert('HSV'))  # uint8 RGB -> uint8 HSV
            elif to == 'lum':
                self._array = np.array(PIL.Image.fromarray(img).convert('L'))  # uint8 RGB -> uint8 Luminance (integer grey)
            elif to == 'rgba':
                self._array = np.dstack((img, 255*np.ones((img.shape[0], img.shape[1]), dtype=np.uint8)))
            elif to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
                self._array = np.dstack((self._array, np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)))  # uint8 BGR -> uint8 BGRA
        elif self.colorspace() == 'bgr':
            img = self.load().array()  # uint8 BGR
            self._array = np.array(img)[:,:,::-1]  # uint8 BGR -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'bgra':
            img = self.load().array()  # uint8 BGRA
            self._array = np.array(img)[:,:,::-1]  # uint8 BGRA -> uint8 ARGB
            self._array = self._array[:,:,[1,2,3,0]]  # uint8 ARGB -> uint8 RGBA
            self.colorspace('rgba')
            self._to_colorspace(to)
        elif self.colorspace() == 'hsv':
            img = self.load().array()  # uint8 HSV
            self._array = np.array(PIL.Image.fromarray(img, mode='HSV').convert('RGB'))  # uint8 HSV -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'float':
            img = self.load().array()  # float32
            if np.max(img) > 1 or np.min(img) < 0:
                #log.warning('Converting float image to "%s" will be rescaled with self.mat2gray() into the range float32 [0,1]' % to)
                img = self.mat2gray().array()
            if not self.channels() in [1,2,3]:
                raise ValueError('Float image must be single channel or three channel RGB in the range float32 [0,1] prior to conversion')
            if self.channels() == 3:  # assumed RGB
                self._array = np.uint8(255 * self.array())   # float32 RGB [0,1] -> uint8 RGB [0,255]
                self.colorspace('rgb')
            else:
                img = np.squeeze(img, axis=2) if img.ndim == 3 else img
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(np.uint8(255 * img)).convert('L')).astype(np.float32)  # float32 RGB [0,1] -> float32 gray [0,1]                
                self.colorspace('grey')
            self._to_colorspace(to)
        elif self.colorspace() is None:
            raise ValueError('Colorspace must be initialized by constructor or colorspace() to allow for colorspace conversion')
        else:
            raise ValueError('unsupported colorspace "%s"' % self.colorspace())

        self.colorspace(to)
        return self

    def affine_transform(self, A, border='zero'):
        """Apply a 3x3 affine geometric transformation to the image. 

        Args:        
            - A [np.ndarray]: 3x3 affine geometric transform from `vipy.geometry.affine_transform`
            - border [str]:  'zero' or 'replicate' to handle elements outside the image rectangle after transformation

        Returns:
            - This object with only the array transformed

        .. note:: The image will be loaded and converted to float() prior to applying the affine transformation.  
        .. note:: This will transform only the pixels, not objects
        """
        assert isnumpy(A) or isinstance(img, vipy.image.Image), "invalid input"
        assert A.shape == (3,3), "The affine transformation matrix should be the output of vipy.geometry.affine_transformation"
        self._array = vipy.geometry.imtransform(self.load().float().array(), A.astype(np.float32), border=border)
        return self

    def rotate(self, r):
        """Apply a rotation in radians to the pixels, with origin in upper left """
        return self.affine_transform(vipy.geometry.affine_transform(r=r))

    def rotate_by_exif(self):
        """Apply a rotation as specified in the 'Orientation' field EXIF metadata"""
        exif = self.exif()
        orientation = exif['Orientation'] if 'Orientation' in exif else None
        if orientation is None or orientation == 1:
            return self
        elif orientation == 2:
            return self.fliplr()
        elif orientation == 3:
            return self.flipud().fliplr()
        elif orientation == 4:
            return self.flipud()
        elif orientation == 5:
            return self.rot90cw().fliplr()
        elif orientation == 6:
            return self.rot90cw()
        elif orientation == 7:
            return self.rot90ccw().fliplr()
        elif orientation == 8:
            return self.rot90ccw()
        else:
            raise ValueError                        
    
    def rgb(self):
        """Convert the image buffer to three channel RGB uint8 colorspace"""
        return self._to_colorspace('rgb')

    def color_transform(self, colorspace):
        """Transform the image buffer from the current `vipy.image.Image.colorspace` to the provided colorspace"""
        return self._to_colorspace(colorspace)
    
    def colorspace_like(self, im):
        """Convert the image buffer to have the same colorspace as the provided image"""
        assert isinstance(im, vipy.image.Image)
        return self._to_colorspace(im.colorspace())
    
    def rgba(self):
        """Convert the image buffer to four channel RGBA uint8 colorspace"""
        return self._to_colorspace('rgba')

    def hsv(self):
        """Convert the image buffer to three channel HSV uint8 colorspace"""
        return self._to_colorspace('hsv')

    def bgr(self):
        """Convert the image buffer to three channel BGR uint8 colorspace"""
        return self._to_colorspace('bgr')

    def bgra(self):
        """Convert the image buffer to four channel BGR uint8 colorspace"""
        return self._to_colorspace('bgra')

    def float(self):
        """Convert the image buffer to float32"""
        return self._to_colorspace('float')

    def greyscale(self):
        """Convert the image buffer to single channel grayscale float32 in range [0,1]"""
        return self._to_colorspace('gray')

    def grayscale(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def grey(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def gray(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def luminance(self):
        """Convert the image buffer to single channel uint8 in range [0,255] corresponding to the luminance component"""
        return self._to_colorspace('lum')

    def lum(self):
        """Alias for luminance()"""
        return self._to_colorspace('lum')

    def _apply_colormap(self, cm):
        """Convert an image to greyscale, then convert to RGB image with matplotlib colormap"""
        """https://matplotlib.org/tutorials/colors/colormaps.html"""
        cm = plt.get_cmap(cm)
        img = self.grey().numpy()
        self._array = np.uint8(255 * cm(img)[:,:,:3])
        self.colorspace('rgb')
        return self

    def jet(self):
        """Apply jet colormap to greyscale image and save as RGB"""
        return self._apply_colormap('jet')

    def rainbow(self):
        """Apply rainbow colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('gist_rainbow')

    def hot(self):
        """Apply hot colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('hot')

    def bone(self):
        """Apply bone colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('bone')

    def saturate(self, min, max):
        """Saturate the image buffer to be clipped between [min,max], types of min/max are specified by _array type"""
        return self.array(np.minimum(np.maximum(self.load().array(), min), max))

    def intensity(self):
        """Convert image to float32 with [min,max] to range [0,1], force colormap to be 'float'.  Equivalent to self.mat2gray()"""
        self.array((self.load().float().array()) - float(self.min()) / float(self.max() - self.min()))
        return self.colorspace('float')

    def mat2gray(self, min=None, max=None):
        """Convert the image buffer so that [min,max] -> [0,1], forces conversion to 'float' colorspace.  This does not change the number of color channels"""
        self.array(mat2gray(np.float32(self.load().float().array()), min, max))
        return self.colorspace('float')
        return self

    def sum_to_one(self, eps=1E-6):
        """Return float image in the range [0,1] such that all elements sum to one"""
        return self.gain(1.0/(eps+self.mat2gray().sum()))
    
    def gain(self, g):
        """Elementwise multiply gain to image array, Gain should be broadcastable to array().  This forces the colospace to 'float'.  Don't use numba optimization, it is slower than native multiply"""
        #return self.array(vipy.math.gain(self.load()._array, np.float32(g))).colorspace('float') if g != 1 else self        
        #return self.array(np.float32(self.load()._array*g)).colorspace('float') if g != 1 else self  # numba not as fast anymore
        return self.array(np.multiply(self.load().float().array(), g)).colorspace('float') if g != 1 else self

    def bias(self, b):
        """Add a bias to the image array.  Bias should be broadcastable to array().  This forces the colorspace to 'float'"""
        self.array(self.load().float().array() + b)
        return self.colorspace('float')

    def normalize(self, gain, bias):
        """Apply a multiplicative gain g and additive bias b, such that self.array() == gain*self.array() + bias.

        This is useful for applying a normalization of an image prior to calling `vipy.image.Image.torch`.

        The following operations are equivalent.

        ```python
        im = vipy.image.RandomImage()
        im.normalize(1/255.0, 0.5) == im.gain(1/255.0).bias(-0.5)
        ```
        
        .. note:: This will force the colorspace to 'float'
        """
        return self.array(gain*self.load().float().array() + bias).colorspace('float')

    def additive_noise(self, hue=(-15,15), saturation=(-15,15), brightness=(-15,15)):
        """Apply uniform random additive noise in the given range to the given HSV color channels.  Image will be converted to HSV prior to applying noise."""
        assert isinstance(hue, tuple) and len(hue) == 2 and hue[1]>=hue[0]
        assert isinstance(saturation, tuple) and len(saturation) == 2 and saturation[1]>=saturation[0]
        assert isinstance(brightness, tuple) and len(brightness) == 2 and brightness[1]>=brightness[0]        
        
        (H,W,C) = (self.height(), self.width(), self.channels())
        noise = np.dstack(((hue[1]-hue[0])*np.random.rand(H,W)+hue[0],
                           (saturation[1]-saturation[0])*np.random.rand(H,W)+saturation[0],
                           (brightness[1]-brightness[0])*np.random.rand(H,W)+brightness[0]))
        return self.array( np.minimum(np.maximum(self.hsv().array() + noise, 0), 255).astype(np.uint8) )
            
    # Image statistics
    def stats(self):
        log.info(self)
        log.info('  Channels: %d' % self.channels())
        log.info('  Shape: %s' % str(self.shape()))
        log.info('  min: %s' % str(self.min()))
        log.info('  max: %s' % str(self.max()))
        log.info('  mean: %s' % str(self.mean()))
        log.info('  channel mean: %s' % str(self.meanchannel()))        
    
    def min(self):
        return self.minpixel()

    def minpixel(self):
        return np.min(self.load().array().flatten())
    
    def max(self):
        return self.maxpixel()

    def maxpixel(self):
        return np.max(self.load().array().flatten())
    
    def mean(self):
        """Mean over all pixels"""
        return np.mean(self.load().array().flatten())

    def meanchannel(self, k=None):
        """Mean per channel over all pixels.  If channel k is provided, return just the mean for that channel"""
        C = np.mean(self.load().array(), axis=(0, 1)).flatten()
        return C[k] if k is not None else C
    
    def sum(self):
        return np.sum(self.load().array().flatten())

    # Image visualization
    def closeall(self):
        """Close all open figure windows"""
        vipy.show.closeall()
        return self
    
    def close(self, fignum=None):
        """Close the requested figure number, or close all of fignum=None"""
        if fignum is None:
            return self.closeall()
        else:
            vipy.show.close(fignum)
            return self
    
    def show(self, figure=1, nowindow=False, timestamp=None, mutator=None, theme='dark'):
        """Display image on screen in provided figure number (clone and convert to RGB colorspace to show), return object"""
        assert self.load().isloaded(), 'Image not loaded'
        timestampfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark' else 'black'
        im = self.clone() if not mutator else mutator(self.clone())        
        vipy.show.imshow(im.rgb().numpy(), fignum=figure, nowindow=nowindow, timestamp=timestamp, timestampfacecolor=timestampfacecolor, flush=True, timestampcolor=timestampcolor)
        return self

    def save(self, filename=None, quality=75):
        """Save the current image to a new filename and return the image object.  Resets edit history"""
        return self.filename(self.saveas(filename if filename   is not None else tempjpg(), quality=quality)).loader(None).flush_array()
        
        
    # Image export
    def pkl(self, pklfile=None):
        """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains"""
        assert pklfile is not None or self.filename() is not None
        pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl')
        remkdir(vipy.util.filepath(pklfile))
        vipy.util.save(self, pklfile)
        return self

    def pklif(self, b, pklfile=None):
        """Save the object to the provided pickle file only if b=True. Useful for conditional intermediate saving in long fluent chains"""
        assert isinstance(b, bool)
        return self.pkl(pklfile) if b else self

    def saveas(self, filename=None, writeas=None, quality=75):
        """Save current buffer (not including drawing overlays) to new filename and return filename.  If filename is not provided, use a temporary JPEG filename."""
        filename = tempjpg() if filename is None else filename
        if self.colorspace() in ['gray']:
            imwritegray(self.grayscale()._array, filename, quality=quality)
        elif self.colorspace() != 'float':
            imwrite(self.load().array(), filename, writeas=writeas, quality=quality)
        else:
            raise ValueError('Convert float image to RGB or gray first. Try self.mat2gray()')
        return filename

    def saveastmp(self):
        """Save current buffer to temp JPEG filename and return filename.  Alias for savetmp()"""
        return self.saveas(tempjpg())

    def savetmp(self):
        """Save current buffer to temp JPEG filename and return filename.   Alias for saveastmp()"""
        return self.saveastmp()

    def tocache(self):
        """Save current buffer to temp JPEG filename in the VIPY cache and return filename."""
        return self.saveas(vipy.util.tocache(tempjpg()))
    
    def base64(self):
        """Export a base64 encoding of the image suitable for embedding in an html page"""
        buf = io.BytesIO()
        self.clone().rgb().pil().save(buf, format='JPEG')
        return base64.b64encode(buf.getvalue())
        
    def ascii(self):
        """Export a base64 ascii encoding of the image suitable for embedding in an <img> tag"""
        return self.base64().decode('ascii')

    def html(self, alt=None, id=None, attributes={'loading':'lazy'}):
        """Export a base64 encoding of the image suitable for embedding in an html page, enclosed in <img> tag
           
           Returns:
              -string:  <img src="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" loading="lazy"> containing base64 encoded JPEG and alt text with lazy loading
        """
        assert isinstance(attributes, dict)
        b = self.base64().decode('ascii')
        alt_text = alt if alt is not None else self.filename()
        id = id if id is not None else self.filename()
        attr = ' '.join(['%s="%s"' % (str(k),str(v)) for (k,v) in attributes.items()])
        return '<img %ssrc="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" %s>' % (('id="%s" ' % id) if id is not None else '', b, str(alt_text), attr)

    def annotate(self, timestamp=None, mutator=None, theme='dark'):
        """Change pixels of this image to include rendered annotation and return an image object"""
        # FIXME: for k in range(0,10): self.annotate().show(figure=k), this will result in cumulative figures
        return vipy.image.Image(array=self.savefig(timestamp=timestamp, theme=theme, mutator=mutator).rgb().array(), colorspace='rgb')

    def savefig(self, filename=None, figure=1, timestamp=None, theme='dark', mutator=None):
        """Save last figure output from self.show() with drawing overlays to provided filename and return filename"""
        self.show(figure=figure, nowindow=True, timestamp=timestamp, theme=theme, mutator=mutator)  # sets figure dimensions, does not display window
        (W,H) = plt.figure(figure).canvas.get_width_height()  # fast
        buf = io.BytesIO()
        plt.figure(1).canvas.print_raw(buf)  # fast
        img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))  # RGBA
        vipy.show.close(figure)
        t = vipy.image.Image(array=img, colorspace='rgba')
        if filename is not None:
            t.rgb().saveas(os.path.abspath(os.path.expanduser(filename)))
        return t

    def map(self, func):
        """Apply lambda function to our numpy array img, such that newimg=f(img), then replace newimg -> self.array().  The output of this lambda function must be a numpy array and if the channels or dtype changes, the colorspace is set to 'float'"""
        assert isinstance(func, types.LambdaType), "Input must be lambda function (e.g. f = lambda img: 255.0-img)"
        oldimg = self.array()  # reference
        newimg = func(self.array())  # in-place
        assert isnumpy(newimg), "Lambda function output must be numpy array"
        self.array(newimg)  # reference
        if newimg.dtype != oldimg.dtype or newimg.shape != oldimg.shape:
            self.colorspace('float')  # unknown colorspace after transformation, set generic
        return self

    def perceptualhash(self, bits=128, asbinary=False, asbytes=False):
        """Perceptual differential hash function

        This function converts to greyscale, resizes with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex(h)
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone()
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    @staticmethod
    def perceptualhash_distance(h1, h2):
        """Hamming distance between two perceptual hashes"""
        assert len(h1) == len(h2)
        return np.sum(np.unpackbits(bytearray().fromhex(h1)) != np.unpackbits(bytearray().fromhex(h2)))
    

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise"""
        self.array(np.rot90(self.numpy(), 3))
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise"""
        self.array(np.rot90(self.numpy(), 1))
        return self

    def face_detection(self, mindim=256,  conf=0.2):
        """Detect faces in the scene, add as objects, return new scene with just faces
        
        Args:
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled back to native resolution prior to return

        Returns
            A `vipy.image.Scene` object with all detected faces or the union of faces and all objects in self

        .. note:: This method uses a CPU-only pretrained face detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28') 
        return heyvi.detection.FaceDetector()(Scene.cast(self.clone()).clear().mindim(mindim)).flush() 
    
    def person_detection(self, mindim=256, conf=0.2):
        """Detect only people in the scene, add as objects, return new scene with just people

        Args:
            mindim [int]: The minimum dimension for downsampling the image for person detection.  Will be upsampled back to native resolution prior to return
            conf [float]: A real value between [0,1] of the minimum confidence for person detection

        Returns
            A `vipy.image.Scene` object with all detected people or the union of people and all objects in self
        
        .. note:: This method uses a CPU-only pretrained person detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28')
        return heyvi.detection.ObjectDetector()(Scene.cast(self.clone()).clear().mindim(mindim), conf=conf, objects=['person']).flush()

    def face_blur(self, radius=4, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.blurmask`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.blurmask`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with faceblur attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().blurmask()
        """
        im = self.face_detection(mindim=mindim)  # only faces
        return im.setattribute('face_blur', [o.int().json(encode=False) for o in im.objects()]).blurmask(radius=radius).downcast()

    def face_pixelize(self, radius=7, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.pixelize`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.radius`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with facepixelize attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().pixelize()
        """
        im = self.face_detection(mindim=mindim)          
        return im.setattribute('face_pixelize', [o.int().json(encode=False) for o in im.objects()]).pixelize(radius=radius).downcast()


    def viewport(self):
        """Return the bounding box of the current loaded pixels in the original filename/url/buffer.

        This reverses the chain of geometric transformations applied to the original image to recover the bounding box of the pixels in array().

        This is useful to specify a region of a larger image that was zoomed in for processing.
        
        To show this viewport as a bounding box:

        >>> im = vipy.image.vehicles().centercrop(100,100)
        >>> viewport = vipy.object.Detection.cast(im.viewport())
        >>> im.flush().append(viewport).show()
        """
        bb = self.imagebox()
        if self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                getattr(bb,f)(**kwargs)
        return bb

    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        return self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
    
    def recenter(self, p):
        """Recenter the image so that point p=(x=col, y=row) in the current image is in the middle of the new image, zeropad to (width, height).  
           This is useful to implement a 'saccade', under the small angle assumption, where a rotation is approximated by a translation
        """        
        return self.padcrop(self.imagebox().centroid(p))

    
class Labeled(Image):
    """A labeled image is an image that contains some form of annotation.  This class is useful for identifying if an image has any annotatation at all or is completely unlabeled.

    >>> im = vipy.image.owl()
    >>> assert isinstance(im, vipy.image.Labeled)
    >>> im = vipy.image.RandomImage()
    >>> assert not isinstance(im, vipy.image.Labeled)    

    The specific form of annotation may be `vipy.image.ImageCategory`, `vipy.image.TaggedImage` or `vipy.image.Scene`, but all are `vipy.image.Labeled` 
    """
    pass


class ImageCategory(Labeled):
    """vipy ImageCategory class

    This class provides a representation of a vipy.image.Image with a category label. 

    Valid constructors include all provided by vipy.image.Image with the additional kwarg 'category' (or alias 'label') and optional confidence

    ```python
    im = vipy.image.ImageCategory(filename='/path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(url='http://path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(array=dog_img, colorspace='rgb', category='dog')
    ```
    """

    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')    
    def __init__(self, filename=None, url=None, category=None, label=None, attributes=None, array=None, colorspace=None, confidence=None):
        # Image class inheritance
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)

        self.set_attribute('category', category)
        if confidence is not None:
            self.set_attribute('confidence', float(confidence))

    def __repr__(self):
        fields = ['category=%s' % str(self.category())]
        fields +=  ['confidence=%1.3f' % self.confidence()] if self.confidence() is not None else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.ImageCategory').replace('>', ', %s>' % ','.join(fields))

    def __eq__(self, other):
        return self.category() == other.category() if isinstance(other, ImageCategory) else False

    def __ne__(self, other):
        return self.category() != other.category() if isinstance(other, ImageCategory) else True

    @classmethod
    def from_json(obj, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   confidence=None, 
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)
    
    def new_category(self, c):
        return self.set_attribute('category', c)

    def clear_category(self):
        if 'category' in self.attributes:
            del self.attributes['category']
        return self
    
    def category(self):
        return self.attributes['category'] if 'category' in self.attributes else None  # self.attributes.get('category') 

    def confidence(self):
        return self.get_attribute('confidence')        

    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('category', tolist(tags)[0])                
        return (self.category(), ) if self.category() is not None else ()

    
class TaggedImage(Labeled):
    """vipy.image.TaggedImage class

    This class provides a representation of a vipy.image.Image with one or more tags.

    Valid constructors include all provided by vipy.image.Image with additional labels that provide ground truth for the content of the image. 

    ```python
    im = vipy.image.TaggedImage(filename='/path/to/dog.jpg', tags={'dog','canine'})
    ```
    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')        
    def __init__(self, filename=None, url=None, attributes=None, array=None, colorspace=None, tags=None, category=None, confidence=None, caption=None):
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)
        
        tags = ([category] if category is not None else []) + (tolist(tags) if tags is not None else [])
        if len(tags) > 0:
            self.set_attribute('tags', tags)
        if caption is not None:
            self.captions(caption)
            
    def __repr__(self):
        fields  = ['category=%s' % self.category()] if len(self.tags())==1 else []
        fields += ['caption=%s' % truncate_string(self.caption(), 40)] if self.caption() is not None else []        
        fields +=  ['confidence=%1.3f' % self.confidence()] if len(self.tags())==1 and self.confidence() is not None else []
        fields +=  ['tags=%s' % truncate_string(str(self.tags()), 40)] if len(self.tags())>1 else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.TaggedImage').replace('>', ', %s>' % ', '.join(fields))
        

    @classmethod
    def from_json(cls, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)

    def category(self):
        return self.attributes['tags'][0] if 'tags' in self.attributes else None

    def new_category(self, c):
        self.attributes['tags'] = [c]
        self.del_attribute('confidences')
        return self
        
    def confidence(self, tag=None, default=None):
        t = tag if tag is not None else self.category()
        return self.get_attribute('confidences')[t] if self.hasattribute('confidences') and t in self.attributes['confidences'] else default

    def has_tag(self, t):
        return t in self.tags()
    
    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('tags', tolist(tags))        
        return self.attributes['tags'] if 'tags' in self.attributes else []
    
    def add_tag(self, tag, confidence=None):
        self.append_attribute('tags', tag)
        if confidence is not None:
            if not self.hasattribute('confidences'):
                self.set_attribute('confidences', {})
            self.attributes['confidences'][tag] = confidence
        return self

    def add_caption(self, caption):
        self.append_attribute('captions', caption)
        return self
    
    def caption(self):
        return self.get_attribute('captions')[0] if self.hasattribute('captions') else None
    
    def captions(self, captions=None):
        if captions is not None:
            return self.set_attribute('captions', tolist(captions))
        return self.get_attribute('captions') if self.hasattribute('captions') else []
    
    def add_tags(self, tags, confidences=[]):
        for (t,c) in zip_longest(tags, confidences):
            self.add_tag(t, c)
        return self

    def clear_tags(self):        
        self.set_attribute('tags',[])
        if 'confidences' in self.attributes:
            del self.attributes['confidences']
        return self
    
    def add_soft_tags(self, soft_tags):
        """Soft tags are a list of (tag, confidence) tuples"""
        for (t,c) in soft_tags:
            self.add_tag(t, c)
        return self

    def add_soft_tag(self, soft_tag):
        """A soft tag is a tuple of (tag, confidence)"""
        return self.add_tag(*soft_tag)
    
    def soft_tags(self):
        """Soft tags are a list of (tag, confidence) tuples.  Will return only those tags with associated confidences.  Will return empty tuple if there are tags but no confidences"""
        return tuple((t, self.attributes['confidences'].get(t)) for t in self.tags() if 'confidences' in self.attributes and self.attributes['confidences'].get(t) is not None)

    def has_soft_tags(self):
        """Return true if there exist a confidence for any tag"""
        return len(self.soft_tags())>0

    
class Scene(TaggedImage):
    """vipy.image.Scene class

    This class provides a representation of a vipy.image.TaggedImage with one or more vipy.object.Object.  The goal of this class is to provide a unified representation for all objects in a scene.

    Valid constructors include all provided by vipy.image.Image() and vipy.image.ImageCategory() with the additional kwarg 'objects', which is a list of vipy.object.Object()

    ```python
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city', objects=[vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city').objects([vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[0,0,100,100])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[[0,0,100,100], [100,100,200,200]])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels=['face', 'desk'] xywh=[[0,0,100,100], [200,200,300,300]])
    ```

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes', '_objectlist')
    
    def __init__(self, filename=None, url=None, category=None, attributes=None, objects=None, xywh=None, boxlabels=None, array=None, colorspace=None, tags=None):
        super().__init__(filename=filename, url=url, attributes=attributes, tags=tags, category=category, array=array, colorspace=colorspace)  
        self._objectlist = []

        if objects is not None:
            if not (isinstance(objects, list) and all([isinstance(bb, vipy.object.Object) for bb in objects])):
                raise ValueError("Invalid object list - Input must be [vipy.object.Object, ...]")
            self._objectlist = objects

        detlist = []
        if xywh is not None:
            if (islistoflists(xywh) or istupleoftuples(xywh)) and all([len(bb)==4 for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=bb) for bb in xywh]
            elif (islist(xywh) or isinstance(xywh, tuple)) and len(xywh)==4 and all([isnumber(bb) for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=xywh)]
            else:
                raise ValueError("Invalid xywh list - Input must be [[x1,y1,w1,h1], ...")            
        if boxlabels is not None:
            if isstring(boxlabels):
                label = boxlabels
                detlist = [d.new_category(label) for d in detlist]
            elif (isinstance(boxlabels, tuple) or islist(boxlabels)) and len(boxlabels) == len(xywh):
                detlist = [d.new_category(label) for (d,label) in zip(detlist, boxlabels)]
            else:
                raise ValueError("Invalid boxlabels list - len(boxlabels) must be len(xywh) with corresponding labels for each xywh box  [label1, label2, ...]")

        self._objectlist = self._objectlist + detlist

        
    @classmethod
    def cast(cls, im):
        assert isinstance(im, vipy.image.Image), "Invalid input - must be derived from vipy.image.Image"
        if im.__class__ != vipy.image.Scene:
            return cls(filename=im._filename, url=im._url, attributes=im.attributes, array=im._array, colorspace=im._colorspace).loader(*im._loader)
        return im
    
    @classmethod
    def from_json(obj, s):
        im = super().from_json(s)
        im.__class__ = vipy.image.Scene
        d = {k.lstrip('_'):v for (k,v) in (json.loads(s) if not isinstance(s, dict) else s).items()}  # prettyjson (remove "_" prefix to attributes)
        if 'objectlist' in d and isinstance(d['objectlist'], dict):
            # Version 1.15.1: expanded serialization to support multiple object types
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']['Detection']] if 'Detection' in  d['objectlist'] else []
            im._objectlist += [vipy.object.Keypoint2d.from_json(s) for s in d['objectlist']['Keypoint2d']] if 'Keypoint2d' in  d['objectlist'] else []
        else:
            # Legacy support: 1.14.4
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']]            
        return im

    def __json__(self):
        """Serialization method for json package"""
        return self.json(encode=True)

    def num_objects(self):
        return len(self._objectlist)
    
    def json(self, encode=True):
        d = {k.lstrip('_'):getattr(self, k) for k in Scene.__slots__ if getattr(self, k) is not None}  # prettyjson (remove "_" prefix to attributes)          
        d['objectlist'] = {'Detection': [bb.json(encode=False) for bb in self._objectlist if isinstance(bb, vipy.object.Detection)],
                           'Keypoint2d': [p.json(encode=False) for p in self._objectlist if isinstance(p, vipy.object.Keypoint2d)]}
        d['objectlist'] = {k:v for (k,v) in  d['objectlist'].items() if len(v) > 0}  # cleanup empty lists
        if 'attributes' in d and len(d['attributes'])==0:  # cleanup empty attributes
            del d['attributes']  # will be recreated in from_json
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() or self.save(), then reload the image from backing filename/url after json import')            
            d['array'] = self._array.tolist()        
        return json.dumps(d) if encode else d

        
    def __eq__(self, other):
        """Scene equality requires equality of all objects in the scene, assumes a total order of objects"""
        return isinstance(other, Scene) and len(self)==len(other) and all([obj1 == obj2 for (obj1, obj2) in zip(self, other)])

    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self.height(), self.width(), self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.filename() is not None:
            strlist.append('filename=%s' % (self.filename()))
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        if len(self.image_tags())==1:
            strlist += ['category=%s' % truncate_string(str(self.category()), 40)]
        elif len(self.image_tags())>1:
            strlist += ['tags=%s' % truncate_string(str(self.image_tags()), 40)]            
        if len(self.objects()) > 0:
            strlist.append('objects=%d' % len(self.objects()))
            
        return str('<vipy.image.Scene: %s>' % (', '.join(strlist)))

    def __len__(self):
        """The length of a scene is equal to the number of objects present in the scene"""
        return len(self._objectlist)

    def __iter__(self):
        """Iterate over each ImageDetection() in the scene"""
        for (k, im) in enumerate(self._objectlist):
            yield self.__getitem__(k)

    def __getitem__(self, k):
        """Return the kth object in the scene as a `vipy.image.Scene` object """
        assert isinstance(k, int), "Indexing by object in scene must be integer"
        return self.clone(shallow=True).objects([self._objectlist[k].clone()])

    def image_tags(self, tags=None):
        """Return the image level tags of the scene"""
        return super().tags(tags)
    
    def tags(self, tags=None):
        """Return the image level and object level tags of the scene"""        
        if tags is not None:
            return super().tags(tags) 
        return super().tags() + self.object_tags()
    
    def load(self, verbose=False):
        super().load(verbose=verbose)
        if self.is_loaded() and self.num_objects() > 0 and any(o.has_normalized_coordinates() for o in self.objects()):
            # Normalized coordinates are in the range [0,1] relative to the (height, width) which is not known until load()
            self.objectmap(lambda o: o.scale_x(self.array().shape[1]).scale_y(self.array().shape[0]).del_attribute('normalized_coordinates') if o.has_normalized_coordinates() else o)
        return self
    
    def split(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene.
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return list(self)

    def split_and_recenter(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene, with the scene centered on the object with zeropadding
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return [im.clone().recenter(im.boundingbox().centroid()) for im in self.split()]
    
    def append_object(self, imdet):
        """Append the provided vipy.object.Detection object to the scene object list"""
        assert isinstance(imdet, vipy.object.Object), "Invalid input"
        self._objectlist.append(imdet)
        return self

    def add_object(self, imdet):
        """Alias for append"""        
        return self.append_object(imdet)
    
    def objects(self, objectlist=None):
        if objectlist is None:
            return self._objectlist
        else:
            assert isinstance(objectlist, list) and (len(objectlist) == 0 or all([isinstance(bb, vipy.object.Object) for bb in objectlist])), "Invalid object list"
            self._objectlist = objectlist
            return self

    def objectmap(self, f):
        """Apply lambda function f to each object.  If f is a list of lambda, apply one to one with the objects"""
        assert callable(f)
        self._objectlist = [f(obj)  for obj in self._objectlist] if not isinstance(f, list) else [g(obj) for (g,obj) in zip(f, self._objectlist)]
        assert all([isinstance(a, vipy.object.Object) for a in self.objects()]), "Lambda function must return vipy.object.Detection"
        return self

    def objectfilter(self, f):
        """Apply lambda function f to each object and keep if filter is True"""
        assert callable(f)
        self._objectlist = [obj for obj in self._objectlist if f(obj) is True]
        return self

    def nms(self, conf, iou, cover=0.8):
        """Non-maximum supporession of objects() by category based on confidence and spatial IoU and cover thresholds"""
        return self.objects( vipy.object.non_maximum_suppression(self.objects(), conf=conf, iou=iou, cover=cover, bycategory=True) )

    def intersection(self, other, miniou, bycategory=True):
        """Return a Scene() containing the objects in both self and other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(v.objects(), other.objects(), miniou, bycategory=bycategory)) if d is not None]
        return v

    def difference(self, other, miniou):
        """Return a Scene() containing the objects in self but not other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(self.objects(), other.objects(), miniou, bycategory=True)) if d is None]
        return v
        
    def union(self, other, miniou=None):
        """Combine the objects of the scene with other and self with no duplicate checking unless miniou is not None"""
        assert isinstance(other, Image)
        if isinstance(other, Scene):
            self.objects(self.objects()+other.objects())
        return self

    def __or__(self, other):
        super().__or__(other)
        return self.union(other)
    
    def uncrop(self, bb, shape):
        """Uncrop a previous crop(bb) called with the supplied bb=BoundingBox(), and zeropad to shape=(H,W)"""
        super().uncrop(bb, shape)
        return self.objectmap(lambda o: o.translate(bb.xmin(), bb.ymin()))
        
    def clear(self):
        """Remove all objects from this scene."""
        return self.objects([])
    
    def boundingbox(self):
        """The boundingbox of a scene is the union of all object bounding boxes, or None if there are no objects.  Load to compensate for normalized coordinates"""
        boxes = [vipy.geometry.BoundingBox.cast(bb) for bb in self.load().objects()]
        bb = boxes[0].clone() if len(boxes) >= 1 else None
        return bb.union(boxes[1:]) if len(boxes) >= 2 else bb

    def object_tags(self):
        """Return list of unique object tags in scene"""
        return list(dict.fromkeys([t for o in self.objects() for t in o.tags()]))
    
    # Spatial transformation
    def _history(self, func=None, **kwargs):
        """The undo history for flush. This is useful for remote processing of images at lower resolutions and square crops without passing around the image buffer"""
        if func is not None:
            self.append_attribute('_history', (func, kwargs))
            return self
        return self.getattribute('_history')

    def flush_array(self):
        return self.flush(undo_history=False)
    
    def flush(self, undo_history=True):
        """Free the image buffer, and undo all of the object transformations to restore alignment with the reference image filename/url"""
        if undo_history and self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                self.objectmap(lambda o: getattr(o,f)(**kwargs))  # undo
        self.delattribute('_history')
        return super().flush()
    
    def imclip(self):
        """Clip all bounding boxes to the image rectangle, silently rejecting those boxes that are degenerate or outside the image"""
        self._objectlist = [o.imclip(self.numpy()) for o in self._objectlist if o.hasoverlap(self.numpy())]
        return self

    def rescale(self, scale=1, interp='bilinear'):
        """Rescale image buffer and all bounding boxes - Not idempotent"""
        self = super().rescale(scale, interp=interp)
        self._objectlist = [bb.rescale(scale) for bb in self._objectlist]
        self._history('rescale', s=1/scale)
        return self

    def resize(self, cols=None, rows=None, height=None, width=None, interp='bilinear'):
        """Resize image buffer to (height=rows, width=cols) and transform all bounding boxes accordingly.  If cols or rows is None, then scale isotropically.  cols is a synonym for width, rows is a synonym for height"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width        
        assert cols is not None or rows is not None, "Invalid input"
        
        sx = (float(cols) / self.width()) if cols is not None else None
        sy = (float(rows) / self.height()) if rows is not None else None
        sx = sy if sx is None else sx
        sy = sx if sy is None else sy        
        self._objectlist = [bb.scale_x(sx).scale_y(sy) for bb in self._objectlist]
        self._history('scale_x', s=1/sx)._history('scale_y', s=1/sy)
        if sx == sy:
            self = super().rescale(sx, interp=interp)  # FIXME: if we call resize here, inheritance is screweed up
        else:
            self = super().resize(cols, rows, interp=interp)
        return self

    def centersquare(self):
        """Crop the image of size (H,W) to be centersquare (min(H,W), min(H,W)) preserving center, and update bounding boxes"""
        (H,W) = self.shape()
        self = super().centersquare()
        (dy, dx) = ((H - self.height())/2.0, (W - self.width())/2.0)
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)
        return self
    
    def fliplr(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.fliplr(self.numpy()) for bb in self._objectlist]
        self._history('fliplr', width=self.width())
        self = super().fliplr()
        return self

    def flipud(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.flipud(self.numpy()) for bb in self._objectlist]
        self._history('flipud', height=self.height())        
        self = super().flipud()
        return self
    
    def dilate(self, s):
        """Dilate all bounding boxes by scale factor, dilated boxes may be outside image rectangle"""
        self._objectlist = [bb.dilate(s) for bb in self._objectlist]
        return self

    def zeropad(self, padwidth, padheight):
        """Zero pad image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().zeropad(padwidth, padheight)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def meanpad(self, padwidth, padheight, mu=None):
        """Mean pad (image color mean) image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().meanpad(padwidth, padheight, mu=mu)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise, and update objects"""
        (H,W) = self.shape()        
        self.array(np.rot90(self.numpy(), 3))
        self._objectlist = [bb.rot90cw(H, W) for bb in self._objectlist]
        self._history('rot90ccw', H=W, W=H)                
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise, and update objects"""
        (H,W) = self.shape()
        self.array(np.rot90(self.numpy(), 1))
        self._objectlist = [bb.rot90ccw(H, W) for bb in self._objectlist]
        self._history('rot90cw', H=W, W=H)                        
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that maximum dimension of image = dim, update all objects"""
        return super().maxdim(dim, interp=interp) if dim is not None else max(self.shape())  # will call self.rescale() which will update boxes

    def mindim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that minimum dimension of image = dim, update all objects"""
        return super().mindim(dim, interp=interp) if dim is not None else min(self.shape())  # will call self.rescale() which will update boxes

    def crop(self, bbox=None):
        """Crop the image buffer using the supplied bounding box object (or the only object if bbox=None), clipping the box to the image rectangle, update all scene objects"""
        assert bbox is not None or (len(self) == 1), "Bounding box must be provided if number of objects != 1"
        bbox = bbox if bbox is not None else [o for o in self._objectlist if isinstance(o, vipy.geometry.BoundingBox)][0]
        self = super()._crop(bbox)        
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)                        
        return self

    def objectcrop(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate)) if bb is not None else self

    def objectsquare(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor, setting to maxsquare prior to crop.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate).maxsquare()) if bb is not None else self        
    
    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self.crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning valid pixels only"""
        return self.crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        bbox = bbox.clone()
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist] # after crop        
        self._history('translate', dx=dx, dy=dy)                                
        return self

    def cornerpadcrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning zero padded result out to (height, width)"""
        return self.padcrop(BoundingBox(xmin=0, ymin=0, width=width, height=height))
    
    # Image export
    def rectangular_mask(self, W=None, H=None):
        """Return a binary array of the same size as the image (or using the
        provided image width and height (W,H) size to avoid an image load),
        with ones inside all bounding boxes"""
        if (W is None or H is None):
            (H, W) = (int(np.round(self.height())),
                      int(np.round(self.width())))
        immask = np.zeros((H, W)).astype(np.uint8)
        for o in self._objectlist:
            if isinstance(o, vipy.geometry.BoundingBox) and o.hasoverlap(immask):
                bbm = o.clone().imclip(self.numpy()).int()
                immask[bbm.ymin():bbm.ymax(), bbm.xmin():bbm.xmax()] = 1
            if isinstance(o, vipy.geometry.Point2d) and o.boundingbox().hasoverlap(immask):
                mask = vipy.calibration.circle(o.x, o.y, o.r, W, H)
                immask[mask>0] = 1
        return immask

    def binarymask(self):
        """Alias for rectangular_mask with in-place update"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = mask[:]  # in-place update
        return self
        
    def bgmask(self):
        """Set all pixels outside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, mask)  # in-place update
        return self  

    def fgmask(self):
        """Set all pixels inside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, 1.0-mask)  # in-place update
        return self
    
    def pixelmask(self, pixelsize=8):
        """Replace pixels within all foreground objects with a privacy preserving pixelated foreground with larger pixels (e.g. like privacy glass)"""
        assert pixelsize > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().rescale(1.0/pixelsize, interp='nearest').resize_like(self, interp='nearest').numpy()[mask > 0]  # in-place update
        return self

    def pixelize(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
    def pixelate(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
        
    
    def blurmask(self, radius=7):
        """Replace pixels within all foreground objects with a privacy preserving blurred foreground"""
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().blur(radius).numpy()[mask > 0]  # in-place update
        return self

    def blurmask_only(self, categories, radius=7):
        """Replace pixels within all foreground objects with specified category with a privacy preserving blurred foreground"""
        assert radius > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"

        objects = self.objects()
        return self.clone().objects([o for o in objects if o.category() in categories]).blurmask(radius=radius).objects(objects)
    
    def replace(self, newim, broadcast=False):
        """Set all image values within the bounding box equal to the provided img, triggers load() and imclip()"""
        assert isinstance(newim, vipy.image.Image), "Invalid replacement image - Must be vipy.image.Image"
        img = self.numpy()        
        newimg = newim.array()
        for d in self._objectlist:
            d.imclip(newimg).imclip(img)
            img[int(d.ymin()):int(d.ymax()),
                int(d.xmin()):int(d.xmax())] = newimg[int(d.ymin()):int(d.ymax()),
                                                      int(d.xmin()):int(d.xmax())] if not broadcast else newim.clone().resize(int(d.width()), int(d.height())).array()
        return self
    
    def meanmask(self):
        """Replace pixels within the foreground objects with the mean pixel color"""
        img = self.numpy()  # force writeable
        img[self.rectangular_mask() > 0] = self.meanchannel()  # in-place update
        return self

    
    def perceptualhash(self, bits=128, asbinary=False, asbytes=False, objmask=False):
        """Perceptual differential hash function.

        This function sets foreground objects to mean color, convert to greyscale, resize with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            objmask: [bool] if true, replace the foreground object masks with the mean color prior to computing
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection of background scenes by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex( bghash() )) which is equivalent to perceptualhash(asbinary=True)
       
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone() if not objmask else self.clone().meanmask()        
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    def fghash(self, bits=8, asbinary=False, asbytes=False):
        """Perceptual differential hash function, computed for each foreground region independently"""
        return [im.crop().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=False)  for im in self]

    
    def bghash(self, bits=128, asbinary=False, asbytes=False):
        """Percetual differential hash function, masking out foreground regions"""
        return self.clone().greyscale().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=True)
        
    def isduplicate(self, im, threshold, bits=128):
        """Background hash near duplicate detection, returns true if self and im are near duplicate images using bghash"""
        assert isinstance(im, Image), "Invalid input"
        return vipy.image.Image.perceptualhash_distance(self.bghash(bits=bits), im.bghash(bits=bits)) < threshold 
    
        
    def show(self, categories=None, figure=1, nocaption=False, nocaption_withstring=[], fontsize=10, boxalpha=0.15, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, captionoffset=(3,-18), nowindow=False, shortlabel=None, timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Show scene detection 

        Args:
           - categories: [list]  List of category names in the scene to show
           - fontsize: [int] or [str]: Size of the font, fontsize=int for points, fontsize='NN:scaled' to scale the font relative to the image size
           - figure: [int|str] Figure number or title, show the image in the provided figure=int numbered window
           - nocaption: [bool]  Show or do not show the text caption in the upper left of the box 
           - nocaption_withstring: [list]:  Do not show captions for those object categories containing any of the strings in the provided list
           - boxalpha (float, [0,1]):  Set the text box background to be semi-transparent with an alpha
           - d_category2color (dict):  Define a dictionary of required mapping of specific category() to box colors.  Non-specified categories are assigned a random named color from vipy.show.colorlist()
           - caption_offset (int, int): The relative position of the caption to the upper right corner of the box.
           - nowindow (bool):  Display or not display the image, used by `vipy.image.Scene.annotation`
           - shortlabel (dict):  An optional dictionary mapping category names to short names easier to display 
           - mutator (lambda):  A lambda function with signature lambda im: f(im) which will modify this image prior to show.  Useful for changing labels on the fly
           - timestampoffset (tuple): (x,y) coordinate offsets to shift the upper left corner timestamp
           - theme [str]: If 'dark' use dark mode, if 'light' use light mode to visualize captions with high contrast dark or light foregrounds 
        """
        colors = vipy.show.colorlist(theme)
        all_colors = vipy.show.colorlist()        
        textfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark'  else 'black'
        timestampfacecolor = 'black' if theme=='dark' else 'white'        
        textfacealpha = 0.8 if theme=='dark' else 0.85
        
        im = self.clone() if not mutator else mutator(self.clone())
        imdisplay = im.rgb() if im.colorspace() != 'rgb' else im.load()  # convert to RGB for show() if necessary
        
        valid_objects = [obj.clone() for obj in imdisplay.objects() if categories is None or obj.category() in tolist(categories)]  # Objects with valid category
        valid_objects = [obj.imclip(self.numpy()) for obj in valid_objects if obj.hasoverlap(self.numpy())]  # Objects within image rectangle
        valid_objects = [obj.new_category(shortlabel[obj.category()]) for obj in valid_objects] if shortlabel else valid_objects  # Display name as shortlabel?
        d_det_category_to_color = {d.category():colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(colors)] for d in valid_objects if isinstance(d, vipy.object.Detection)}
        d_kp_category_to_color = {d.category():all_colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(all_colors)] for d in valid_objects if isinstance(d, vipy.object.Keypoint2d)}        
        d_category_to_color = mergedict(d_kp_category_to_color, d_det_category_to_color, d_category2color)
        
        object_color = [d_category_to_color[d.category()] for d in valid_objects]                
        valid_objects  = [d if not any([c in d.category() for c in tolist(nocaption_withstring)]) else d.nocategory() for d in valid_objects]  # Objects requested to show without caption

        fontsize_scaled = float(fontsize.split(':')[0])*(min(imdisplay.shape())/640.0) if isstring(fontsize) else fontsize
        vipy.show.imobjects(imdisplay._array, valid_objects, bordercolor=object_color, textcolor=object_color, fignum=figure, do_caption=(nocaption==False), facealpha=boxalpha, fontsize=fontsize_scaled,
                            captionoffset=captionoffset, nowindow=nowindow, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestamp=timestamp,
                            timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestampoffset=timestampoffset)
        return self

    def annotate(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Alias for `vipy.image.Scene.savefig"""
        return self.savefig(outfile=outfile, 
                            categories=categories, 
                            figure=figure, 
                            nocaption=nocaption, 
                            fontsize=fontsize, 
                            boxalpha=boxalpha, 
                            d_category2color=d_category2color,
                            captionoffset=captionoffset, 
                            dpi=dpi, 
                            shortlabel=shortlabel, 
                            nocaption_withstring=nocaption_withstring, 
                            timestamp=timestamp,
                            theme=theme,
                            timestampoffset=timestampoffset,
                            mutator=mutator)

    def savefig(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, textfacecolor='white', shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Save `vipy.image.Scene.show output to given file or return buffer without popping up a window"""
        fignum = figure if figure is not None else 1        
        self.show(categories=categories, figure=fignum, nocaption=nocaption, fontsize=fontsize, boxalpha=boxalpha, 
                  d_category2color=d_category2color, captionoffset=captionoffset, nowindow=True, 
                  shortlabel=shortlabel, nocaption_withstring=nocaption_withstring, timestamp=timestamp,
                  mutator=mutator, timestampoffset=timestampoffset, theme=theme)
        
        if outfile is None:
            buf = io.BytesIO()
            (W,H) = plt.figure(num=fignum).canvas.get_width_height()  # fast(ish)
            plt.figure(num=fignum).canvas.print_raw(buf)  # fast(ish), FIXME: there is a bug here with captions showing behind bboxes on macos
            img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))
            if figure is None:
                vipy.show.close(plt.gcf().number)   # memory cleanup (useful for video annotation on last frame)
            return vipy.image.Image(array=img, colorspace='rgba').rgb()
        else:
            vipy.show.savefig(os.path.abspath(os.path.expanduser(outfile)), figure, dpi=dpi, bbox_inches='tight', pad_inches=0)
            return outfile

        
    
class ImageDetection(Scene):
    """vipy.image.ImageDetection class

    This class provides a representation of a `vipy.image.Image` with a single `vipy.object.Detection`.  This is useful for direct bounding box manipulations.

    This class inherits all methods of `vipy.image.Image` and `vipy.object.Detection` (and therefore `vipy.geometry.BoundingBox`).  

    Inheritance priority is for Image.  Overloaded methods such as rescale() or width() will transform or return values for the Image.

    Valid constructors include all provided by vipy.image.Image and BoundingBox coordinates

    ```python
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, width=100, height=100)
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, xmax=100, ymax=100)
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xcentroid=50, ycentroid=50, width=100, height=100)
    ```

    .. notes::
        - The inheritance resolution order will prefer the subclass methods for `vipy.image.Image`.  For example, the shape() method will return the image shape.
        - Use `vipy.image.DetectionImage` or `vipy.image.ImageDetection.detectionimage` cast if you prefer overloaded methods to resolve to bounding box manipulation..
        - All methods in this class will transform the pixels or the box independently.  The use case for this class is to manipulate boxes relative to the image for refinement (e.g. data augmentation).
        - If you want the pixels to be transformed along with the boxes, use the `vipy.image.ImageDetection.scene` method to cast this to a `vipy.image.Scene` object.
    """
    
    def __init__(self, filename=None, url=None, attributes=None, colorspace=None, array=None, 
                 xmin=None, xmax=None, ymin=None, ymax=None, width=None, height=None, 
                 xcentroid=None, ycentroid=None, category=None, xywh=None, ulbr=None, bbox=None, id=True):

        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)
        
        self.add_object(vipy.object.Detection(xmin=xmin,
                                              ymin=ymin,
                                              width=width,
                                              height=height,
                                              xmax=xmax,
                                              ymax=ymax,
                                              xcentroid=xcentroid,
                                              ycentroid=ycentroid,
                                              xywh=xywh if xywh is not None else (bbox.xywh() if isinstance(bbox, BoundingBox) else None),
                                              ulbr=ulbr,
                                              category=category,
                                              attributes=attributes,
                                              id=id))
        
    def __repr__(self):
        return str('<vipy.image.ImageDetection: %s, %s>' % (super().__repr__(), self._objectlist[0].__repr__()))
        
    def __eq__(self, other):
        """ImageDetection equality is defined as equivalent categories and boxes (not pixels)"""
        return self.boundingbox() == other.boundingbox() if isinstance(other, ImageDetection) else False

    def num_objects(self):
        return 1
    
    @classmethod
    def from_json(obj, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=d['category'] if 'category' in d else None,
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None,                                                         
                   xmin=d['xmin'] if 'xmin' in d else None,
                   ymin=d['ymin'] if 'ymin' in d else None,                   
                   xmax=d['xmax'] if 'xmax' in d else None,
                   ymax=d['ymax'] if 'ymax' in d else None,
                   id=d['id'] if 'id' in d else None)

    def boundingbox(self):
        return vipy.geometry.BoundingBox(ulbr=self._objectlist[0].ulbr())

    def crop(self):
        """Crop the image using the bounding box and return a `vipy.image.Image` for the cropped pixels"""
        return vipy.image.Image.cast(self.clone())._crop(self.boundingbox())

    
    
def mutator_show_trackid(n_digits_in_trackid=None):
    """Mutate the image to show track ID with a fixed number of digits appended to the category as (####)"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('%s (%s)' % (o.category(), o.attributes['__trackid'][0:n_digits_in_trackid]))
                                            if o.has_attribute('__trackid') else o))

def mutator_show_trackindex():
    """Mutate the image to show track index appended to the category as (####)"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('%s (%d)' % (o.category(), int(o.attributes['__track_index']))) if o.has_attribute('__track_index') else o))

def mutator_show_track_only():
    """Mutate the image to show track as a consistently colored box with no categories"""
    f = mutator_show_trackindex()
    return lambda im, k=None, f=f: f(im).objectmap(lambda o: o.new_category('__%s' % o.category()))  # prepending __ will not show it, but will color boxes correctly
    
def mutator_show_noun_only(nocaption=False):
    """Mutate the image to show the noun only.  
    
    Args:
        nocaption: [bool] If true, then do not display the caption, only consistently colored boxes for the noun. 
    
    ..note:: To color boxes by track rather than noun, use `vipy.image.mutator_show_trackonly`
    """
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join([('__'+n if nocaption else n) for n in o.attributes['__track_category']])) if o.has_attribute('__track_category') else o))

def mutator_show_verb_only():
    """Mutate the image to show the verb only"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join([v for v in o.attributes['__activity_category']])) if o.has_attribute('__activity_category') else o))

def mutator_show_noun_or_verb():
    """Mutate the image to show the verb only if it is non-zero else noun"""
    return lambda im,k=None: (im.objectmap(lambda o: o.new_category('\n'.join([v if len(v)>0 else n for (n,v) in zip(o.attributes['__track_category'], o.attributes['__activity_category'])])) if o.has_attribute('__track_category') and o.has_attribute('__activity_category') else o))

def mutator_show_noun_verb():
    """Mutate the image to show the category as 'Noun Verb1\nNoun Verb2'"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join(['%s %s' % (n.capitalize().replace('_',' '),
                                                                                       (v.replace('%s_'%n.lower(),'',1) if v.lower().startswith(n.lower()) else v).replace('_',' '))
                                                                            for (n,v) in zip(o.attributes['__track_category'], o.attributes['__activity_category'])]))
                                            if o.has_attribute('__track_category') and o.has_attribute('__activity_category') else o))
    
def mutator_show_trackindex_verbonly(confidence=True, significant_digits=2):
    """Mutate the image to show boxes colored by track index, and only show 'verb' captions with activity confidence, sorted in decreasing order"""
    f = mutator_show_trackindex()
    return lambda im, k=None, f=f: f(im).objectmap(lambda o: o.new_category('__%s' % o.category()) if (len(o.attributes['__track_category']) == 1 and len(o.attributes['__activity_category'][0]) == 0) else o.new_category('\n'.join(['%s %s' % (v, ('(%1.2f)'%float(c)) if (confidence is True and c is not None) else '') for (n,v,c) in sorted(zip(o.attributes['__track_category'], o.attributes['__activity_category'], o.attributes['__activity_conf']), key=lambda x: float(x[2]) if x[2] else 0, reverse=True)])))


def RandomImage(rows=None, cols=None):
    """Return a uniform random color `vipy.image.Image` of size (rows, cols)"""
    rows = np.random.randint(128, 1024) if rows is None else rows
    cols = np.random.randint(128, 1024) if cols is None else cols
    return Image(array=np.uint8(255 * np.random.rand(rows, cols, 3)), colorspace='rgb')


def RandomImageDetection(rows=None, cols=None):
    """Return a uniform random color `vipy.image.ImageDetection` of size (rows, cols) with a random bounding box"""
    rows = np.random.randint(128, 1024) if rows is None else rows
    cols = np.random.randint(128, 1024) if cols is None else cols
    return ImageDetection(array=np.uint8(255 * np.random.rand(rows, cols, 3)), colorspace='rgb', category='RandomImageDetection',
                          xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
                          width=np.random.randint(16,cols), height=np.random.randint(16,rows))

def RandomScene(rows=None, cols=None, num_detections=8, num_keypoints=8, num_tags=4, num_objects=None, url=None):
    """Return a uniform random color `vipy.image.Scene` of size (rows, cols) with a specified number of vipy.object.Object` objects"""    
    im = Scene(array=RandomImage(rows, cols).array()) if url is None else Scene(url=url)
    (rows, cols) = im.shape()
    objects = []
    if num_objects:
        (num_detection, num_keypoints) = (num_objects//2, num_objects//2)
    if num_detections:
        objects += [vipy.object.Detection('obj%04d' % k, xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols), height=np.random.randint(16,rows), confidence=float(np.random.rand())) for k in range(num_detections)]
    if num_keypoints:
        objects += [vipy.object.Keypoint2d(category='kp%02d' % (k%20), x=np.random.randint(0,cols - 16), y=np.random.randint(0,rows - 16), radius=np.random.randint(0.01*cols, 0.1*cols), confidence=float(np.random.rand())) for k in range(num_keypoints)]
    if num_tags:
        im.add_soft_tags([('tag%d'%k, float(np.random.rand())) for k in range(num_tags)])
            
    return im.objects(objects)
    

def owl():
    """Return a superb owl image for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/thumb/2/23/Bubo_virginianus_06.jpg/512px-Bubo_virginianus_06.jpg',
                 filename=vipy.util.tocache('owl.jpg'), # to avoid redownload 
                 objects=[vipy.object.Detection('Great Horned Owl', xmin=93, ymin=85, width=373, height=560)]).mindim(1024)

def vehicles():
    """Return a highway scene with the four highest confidence vehicle detections for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/3/3e/I-80_Eastshore_Fwy.jpg',
                 filename=vipy.util.tocache('vehicles.jpg'), # to avoid redownload                 
                 category='Highway',
                 objects=[vipy.object.Detection(category="car", xywh=(473.0, 592.2, 92.4, 73.4)),
                          vipy.object.Detection(category="car", xywh=(1410.0, 756.1, 175.2, 147.3)),
                          vipy.object.Detection(category="car", xywh=(316.9, 640.1, 119.4, 119.5)),
                          vipy.object.Detection(category="car", xywh=(886.9, 892.9, 223.8, 196.6))]).mindim(1024)

def people():
    """Return a crowd scene with the four highest confidence person detections for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/b/be/July_4_crowd_at_Vienna_Metro_station.jpg',
                 filename=vipy.util.tocache('people.jpg'), # to avoid redownload
                 category='crowd',
                 objects=[vipy.object.Detection(category="person", xywh=(1.8, 1178.7, 574.1, 548.0)),
                          vipy.object.Detection(category="person", xywh=(1589.4, 828.3, 363.0, 887.7)),
                          vipy.object.Detection(category="person", xywh=(1902.9, 783.1, 250.8, 825.8)),
                          vipy.object.Detection(category="person", xywh=(228.2, 948.7, 546.8, 688.5))]).mindim(1024)

    
    
class Transform():
    """Transforms are static methods that implement common transformation patterns used in distributed processing.  

       These are useful for parallel processing of noisy or corrupted images when anonymous functions are not supported (e.g. multiprocessing)
 
       See also: `vipy.dataset.Dataset.minibatch` for parallel processing of batches of images for downloading, loading, resizing, cropping, augmenting, tensor prep etc.
    """
    
    @staticmethod
    def load(im):
        try:
            return im.clone().load()
        except:
            return im.flush()

    @staticmethod
    def download(im):
        try:
            return im.clone().download()
        except:
            return im.flush()

    @staticmethod
    def is_loaded(im):
        return im.is_loaded()

    @staticmethod
    def mindim(im, mindim=256):
        try:
            return im.clone().load().mindim(mindim)
        except:
            return im.flush()

        
    @staticmethod
    def thumbnail(im, mindim=64, outfile=None):
        try:
            return im.clone().load().mindim(mindim).save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
        except:
            return im.flush()

    @staticmethod
    def saveas(im, filename):
        try:
            return im.clone().load().saveas(filename)
        except:
            return im.flush()
        
    @staticmethod
    def annotate(im, mindim=64, outfile=None):
        try:
            return im.clone().load().mindim(mindim).annotate().save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
        except:
            return im.flush()
        
    @staticmethod
    def centersquare_32x32_normalized(im):
        return im.clone().load().rgb().centersquare().resize(32,32).gain(1/255) if not im.loaded() else im

    @staticmethod
    def centersquare_32x32_lum_normalized(im):
        return im.clone().load().centersquare().lum().resize(32,32).gain(1/255) if not im.loaded() else im
    
    @staticmethod
    def centersquare_256x256_normalized(im):
        return im.clone().load().rgb().centersquare().resize(256,256).gain(1/255) if not im.loaded() else im

    @staticmethod
    def mindim256_normalized(im):
        return im.clone().load().rgb().mindim(256).gain(1/255) if not im.loaded() else im
    
    @staticmethod
    def tensor(image, shape=None, gain=None, mindim=None, colorspace=None, centersquare=None, tensor=None, ignore_errors=False, jitter=None, num_augmentations=None):
        try:
            im = image.clone()
            if colorspace == 'lum':
                im = im.lum()
            if colorspace == 'rgb':
                im = im.rgb()
            if colorspace == 'float':
                im = im.float()
            if jitter == 'randomcrop':
                import vipy.noise                  
                im = vipy.noise.randomcrop(im)                
            if centersquare:
                im = im.centersquare()
            if shape is not None:
                im = im.resize(*shape)
            if mindim:
                im = im.mindim(mindim)
            if gain is not None:
                im = im.gain(gain)
            if tensor:
                im = im.torch()  # CHW
            if num_augmentations:
                augmentations = np.stack([np.atleast_3d(Transform.tensor(image, shape=shape, gain=gain, mindim=mindim, colorspace=colorspace, centersquare=centersquare, ignore_errors=ignore_errors, jitter=jitter).array())
                                          for k in range(num_augmentations+1)], axis=3)  # +1 for mean 
                return image.clone().array(augmentations)  # packed nd-array, use im.torch('NCHW') to access
                
            return im
        
        except KeyboardInterrupt:
            raise
        except:
            if not ignore_errors:
                raise
            return None

    @staticmethod
    def to_tensor(**kwargs):
        return functools.partial(Transform.tensor, **kwargs)

    @staticmethod
    def is_transformed(im):
        return im is not None

Functions

def RandomImage(rows=None, cols=None)

Return a uniform random color Image of size (rows, cols)

Expand source code Browse git

def RandomImage(rows=None, cols=None):
    """Return a uniform random color `vipy.image.Image` of size (rows, cols)"""
    rows = np.random.randint(128, 1024) if rows is None else rows
    cols = np.random.randint(128, 1024) if cols is None else cols
    return Image(array=np.uint8(255 * np.random.rand(rows, cols, 3)), colorspace='rgb')

def RandomImageDetection(rows=None, cols=None)

Return a uniform random color ImageDetection of size (rows, cols) with a random bounding box

Expand source code Browse git

def RandomImageDetection(rows=None, cols=None):
    """Return a uniform random color `vipy.image.ImageDetection` of size (rows, cols) with a random bounding box"""
    rows = np.random.randint(128, 1024) if rows is None else rows
    cols = np.random.randint(128, 1024) if cols is None else cols
    return ImageDetection(array=np.uint8(255 * np.random.rand(rows, cols, 3)), colorspace='rgb', category='RandomImageDetection',
                          xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16),
                          width=np.random.randint(16,cols), height=np.random.randint(16,rows))

def RandomScene(rows=None, cols=None, num_detections=8, num_keypoints=8, num_tags=4, num_objects=None, url=None)

Return a uniform random color Scene of size (rows, cols) with a specified number of vipy.object.Object` objects

Expand source code Browse git

def RandomScene(rows=None, cols=None, num_detections=8, num_keypoints=8, num_tags=4, num_objects=None, url=None):
    """Return a uniform random color `vipy.image.Scene` of size (rows, cols) with a specified number of vipy.object.Object` objects"""    
    im = Scene(array=RandomImage(rows, cols).array()) if url is None else Scene(url=url)
    (rows, cols) = im.shape()
    objects = []
    if num_objects:
        (num_detection, num_keypoints) = (num_objects//2, num_objects//2)
    if num_detections:
        objects += [vipy.object.Detection('obj%04d' % k, xmin=np.random.randint(0,cols - 16), ymin=np.random.randint(0,rows - 16), width=np.random.randint(16,cols), height=np.random.randint(16,rows), confidence=float(np.random.rand())) for k in range(num_detections)]
    if num_keypoints:
        objects += [vipy.object.Keypoint2d(category='kp%02d' % (k%20), x=np.random.randint(0,cols - 16), y=np.random.randint(0,rows - 16), radius=np.random.randint(0.01*cols, 0.1*cols), confidence=float(np.random.rand())) for k in range(num_keypoints)]
    if num_tags:
        im.add_soft_tags([('tag%d'%k, float(np.random.rand())) for k in range(num_tags)])
            
    return im.objects(objects)

def mutator_show_noun_only(nocaption=False)

Mutate the image to show the noun only.

Args

nocaption: [bool] If true, then do not display the caption, only consistently colored boxes for the noun.

Note: To color boxes by track rather than noun, use vipy.image.mutator_show_trackonly

Expand source code Browse git

def mutator_show_noun_only(nocaption=False):
    """Mutate the image to show the noun only.  
    
    Args:
        nocaption: [bool] If true, then do not display the caption, only consistently colored boxes for the noun. 
    
    ..note:: To color boxes by track rather than noun, use `vipy.image.mutator_show_trackonly`
    """
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join([('__'+n if nocaption else n) for n in o.attributes['__track_category']])) if o.has_attribute('__track_category') else o))

def mutator_show_noun_or_verb()

Mutate the image to show the verb only if it is non-zero else noun

Expand source code Browse git

def mutator_show_noun_or_verb():
    """Mutate the image to show the verb only if it is non-zero else noun"""
    return lambda im,k=None: (im.objectmap(lambda o: o.new_category('\n'.join([v if len(v)>0 else n for (n,v) in zip(o.attributes['__track_category'], o.attributes['__activity_category'])])) if o.has_attribute('__track_category') and o.has_attribute('__activity_category') else o))

def mutator_show_noun_verb()

Mutate the image to show the category as 'Noun Verb1 Noun Verb2'

Expand source code Browse git

def mutator_show_noun_verb():
    """Mutate the image to show the category as 'Noun Verb1\nNoun Verb2'"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join(['%s %s' % (n.capitalize().replace('_',' '),
                                                                                       (v.replace('%s_'%n.lower(),'',1) if v.lower().startswith(n.lower()) else v).replace('_',' '))
                                                                            for (n,v) in zip(o.attributes['__track_category'], o.attributes['__activity_category'])]))
                                            if o.has_attribute('__track_category') and o.has_attribute('__activity_category') else o))

def mutator_show_track_only()

Mutate the image to show track as a consistently colored box with no categories

Expand source code Browse git

def mutator_show_track_only():
    """Mutate the image to show track as a consistently colored box with no categories"""
    f = mutator_show_trackindex()
    return lambda im, k=None, f=f: f(im).objectmap(lambda o: o.new_category('__%s' % o.category()))  # prepending __ will not show it, but will color boxes correctly

def mutator_show_trackid(n_digits_in_trackid=None)

Mutate the image to show track ID with a fixed number of digits appended to the category as (####)

Expand source code Browse git

def mutator_show_trackid(n_digits_in_trackid=None):
    """Mutate the image to show track ID with a fixed number of digits appended to the category as (####)"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('%s (%s)' % (o.category(), o.attributes['__trackid'][0:n_digits_in_trackid]))
                                            if o.has_attribute('__trackid') else o))

def mutator_show_trackindex()

Mutate the image to show track index appended to the category as (####)

Expand source code Browse git

def mutator_show_trackindex():
    """Mutate the image to show track index appended to the category as (####)"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('%s (%d)' % (o.category(), int(o.attributes['__track_index']))) if o.has_attribute('__track_index') else o))

def mutator_show_trackindex_verbonly(confidence=True, significant_digits=2)

Mutate the image to show boxes colored by track index, and only show 'verb' captions with activity confidence, sorted in decreasing order

Expand source code Browse git

def mutator_show_trackindex_verbonly(confidence=True, significant_digits=2):
    """Mutate the image to show boxes colored by track index, and only show 'verb' captions with activity confidence, sorted in decreasing order"""
    f = mutator_show_trackindex()
    return lambda im, k=None, f=f: f(im).objectmap(lambda o: o.new_category('__%s' % o.category()) if (len(o.attributes['__track_category']) == 1 and len(o.attributes['__activity_category'][0]) == 0) else o.new_category('\n'.join(['%s %s' % (v, ('(%1.2f)'%float(c)) if (confidence is True and c is not None) else '') for (n,v,c) in sorted(zip(o.attributes['__track_category'], o.attributes['__activity_category'], o.attributes['__activity_conf']), key=lambda x: float(x[2]) if x[2] else 0, reverse=True)])))

def mutator_show_verb_only()

Mutate the image to show the verb only

Expand source code Browse git

def mutator_show_verb_only():
    """Mutate the image to show the verb only"""
    return lambda im, k=None: (im.objectmap(lambda o: o.new_category('\n'.join([v for v in o.attributes['__activity_category']])) if o.has_attribute('__activity_category') else o))

def owl()

Return a superb owl image for testing

Expand source code Browse git

def owl():
    """Return a superb owl image for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/thumb/2/23/Bubo_virginianus_06.jpg/512px-Bubo_virginianus_06.jpg',
                 filename=vipy.util.tocache('owl.jpg'), # to avoid redownload 
                 objects=[vipy.object.Detection('Great Horned Owl', xmin=93, ymin=85, width=373, height=560)]).mindim(1024)

def people()

Return a crowd scene with the four highest confidence person detections for testing

Expand source code Browse git

def people():
    """Return a crowd scene with the four highest confidence person detections for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/b/be/July_4_crowd_at_Vienna_Metro_station.jpg',
                 filename=vipy.util.tocache('people.jpg'), # to avoid redownload
                 category='crowd',
                 objects=[vipy.object.Detection(category="person", xywh=(1.8, 1178.7, 574.1, 548.0)),
                          vipy.object.Detection(category="person", xywh=(1589.4, 828.3, 363.0, 887.7)),
                          vipy.object.Detection(category="person", xywh=(1902.9, 783.1, 250.8, 825.8)),
                          vipy.object.Detection(category="person", xywh=(228.2, 948.7, 546.8, 688.5))]).mindim(1024)

def vehicles()

Return a highway scene with the four highest confidence vehicle detections for testing

Expand source code Browse git

def vehicles():
    """Return a highway scene with the four highest confidence vehicle detections for testing"""
    return Scene(url='https://upload.wikimedia.org/wikipedia/commons/3/3e/I-80_Eastshore_Fwy.jpg',
                 filename=vipy.util.tocache('vehicles.jpg'), # to avoid redownload                 
                 category='Highway',
                 objects=[vipy.object.Detection(category="car", xywh=(473.0, 592.2, 92.4, 73.4)),
                          vipy.object.Detection(category="car", xywh=(1410.0, 756.1, 175.2, 147.3)),
                          vipy.object.Detection(category="car", xywh=(316.9, 640.1, 119.4, 119.5)),
                          vipy.object.Detection(category="car", xywh=(886.9, 892.9, 223.8, 196.6))]).mindim(1024)

Classes

class Image (filename=None, url=None, array=None, colorspace=None, attributes=None)

vipy.image.Image class

The vipy image class provides a fluent, lazy interface for representing, transforming and visualizing images. The following constructors are supported:

im = vipy.image.Image(filename="/path/to/image.ext")

All image file formats that are readable by PIL are supported here.

im = vipy.image.Image(url="http://domain.com/path/to/image.ext")

The image will be downloaded from the provided url and saved to a temporary filename. The environment variable VIPY_CACHE controls the location of the directory used for saving images, otherwise this will be saved to the system temp directory.

im = vipy.image.Image(url="http://domain.com/path/to/image.ext", filename="/path/to/new/image.ext")

The image will be downloaded from the provided url and saved to the provided filename. The url() method provides optional basic authentication set for username and password

im = vipy.image.Image(array=img, colorspace='rgb')

The image will be constructed from a provided numpy array 'img', with an associated colorspace. The numpy array and colorspace can be one of the following combinations:

'rgb': uint8, three channel (red, green, blue)
'rgba': uint8, four channel (rgb + alpha)
'bgr': uint8, three channel (blue, green, red), such as is returned from cv2.imread()
'bgra': uint8, four channel
'hsv': uint8, three channel (hue, saturation, value)
'lum;: uint8, one channel, luminance (8 bit grey level)
'grey': float32, one channel in range [0,1] (32 bit intensity)
'float': float32, any channel in range [-inf, +inf]

The most general colorspace is 'float' which is used to manipulate images prior to network encoding, such as applying bias.

Args

filename: a path to an image file that is readable by PIL
url: a url string to an image file that is readable by PIL
array: a numpy array of type uint8 or float32 of shape HxWxC=height x width x channels
colorspace: a string in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']
attributes: a python dictionary that is passed by reference to the image. This is useful for encoding metadata about the image. Accessible as im.attributes

Returns

A Image object

Expand source code Browse git

class Image():
    """vipy.image.Image class
    
    The vipy image class provides a fluent, lazy interface for representing, transforming and visualizing images.
    The following constructors are supported:

    ```python
    im = vipy.image.Image(filename="/path/to/image.ext")
    ```
    
    All image file formats that are readable by PIL are supported here.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext")
    ```
    
    The image will be downloaded from the provided url and saved to a temporary filename.
    The environment variable VIPY_CACHE controls the location of the directory used for saving images, otherwise this will be saved to the system temp directory.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext", filename="/path/to/new/image.ext")
    ```

    The image will be downloaded from the provided url and saved to the provided filename.
    The url() method provides optional basic authentication set for username and password

    ```python
    im = vipy.image.Image(array=img, colorspace='rgb')
    ```

    The image will be constructed from a provided numpy array 'img', with an associated colorspace.  The numpy array and colorspace can be one of the following combinations:

    - 'rgb': uint8, three channel (red, green, blue)
    - 'rgba':  uint8, four channel (rgb + alpha)
    - 'bgr': uint8, three channel (blue, green, red), such as is returned from cv2.imread()
    - 'bgra':  uint8, four channel
    - 'hsv':  uint8, three channel (hue, saturation, value)
    - 'lum;:  uint8, one channel, luminance (8 bit grey level)
    - 'grey':  float32, one channel in range [0,1] (32 bit intensity)
    - 'float':  float32, any channel in range [-inf, +inf]
    
    The most general colorspace is 'float' which is used to manipulate images prior to network encoding, such as applying bias. 
    
    Args:
        filename: a path to an image file that is readable by PIL
        url:  a url string to an image file that is readable by PIL
        array: a numpy array of type uint8 or float32 of shape HxWxC=height x width x channels
        colorspace:  a string in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']
        attributes:  a python dictionary that is passed by reference to the image.  This is useful for encoding metadata about the image.  Accessible as im.attributes

    Returns:
        A `vipy.image.Image` object

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')
    def __init__(self, filename=None, url=None, array=None, colorspace=None, attributes=None):
        # Private attributes
        self._loader = None     # function to load an image, set with loader() method
        self._array = None
        self._colorspace = None
        
        # Initialization
        self._filename = filename
        if url is not None:
            assert isinstance(url, str) and url.startswith(('http://', 'https://', 'scp://', 's3://'))  # faster than vipy.util.isurl()
        self._url = url
        if array is not None:
            assert isnumpy(array), 'Invalid Array - Type "%s" must be np.array()' % (str(type(array)))
        self.array(array)  # shallow copy

        # Colorspace guesses:
        if not colorspace:
            # Guess RGB colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'rgb' if (self.isloaded() and self._array.ndim==3 and self._array.shape[2] == 3 and self._array.dtype == np.uint8) else colorspace

            # Guess LUM colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'lum' if (self.isloaded() and (self._array.ndim==2 or (self._array.ndim==3 and self._array.shape[2] == 1)) and self._array.dtype == np.uint8) else colorspace
            
            # Guess float colorspace if array is float32 and colorspace is not provided        
            colorspace = 'float' if (self.isloaded() and self._array.dtype == np.float32) else colorspace
            
        self.colorspace(colorspace)
        
        # Public attributes: passed in as a dictionary
        self.attributes = {} 
        if attributes is not None:
            assert isinstance(attributes, dict), "Attributes must be dictionary"
            self.attributes = attributes

    @classmethod
    def cast(cls, im):
        """Typecast the conformal vipy.image object im as `vipy.image.Image`.
        
        This is useful for downcasting `vipy.image.Scene` or `vipy.image.ImageDetection` down to an image.

        ```python
        ims = vipy.image.RandomScene()
        im = vipy.image.Image.cast(im)
        ```

        """
        assert isinstance(im, vipy.image.Image), "Invalid input - must derive from vipy.image.Image"
        return cls(filename=im._filename, url=im._url, array=im._array, colorspace=im._colorspace, attributes=im.attributes)


    @classmethod
    def from_dict(cls, d):
        d = {k.lstrip('_'):v for (k,v) in d.items()}  # prettyjson (remove "_" prefix to attributes)                
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   attributes=d['attributes'] if 'attributes' in d else None)
        
    @classmethod
    def from_uri(cls, uri):
        """Create an image object from an absolute file path or url"""
        assert vipy.util.isurl(uri) or vipy.util.isfile(uri), "invalid path"
        return cls(url=uri if vipy.util.isurl(uri) else None, filename=uri if vipy.util.isfile(uri) else None)            
    
    @classmethod
    def from_json(cls, s):
        """Import the JSON string s as an `vipy.image.Image` object.

        Args:
            s: json encoded string
        
        This will perform a round trip such that im1 == im2

        ```python
        im1 = vupy.image.RandomImage()
        im2 = vipy.image.Image.from_json(im1.json())
        assert im1 == im2
        ```

        Note: to construct from non-encoded json (e.g. a dict prior to dumps), use from_dict
        
        """
        return cls.from_dict(json.loads(s) if not isinstance(s, dict) else s)
    
    def __eq__(self, other):
        """Images are equivalent if they have the same filename, url and array"""
        return isinstance(other, Image) and other.filename()==self.filename() and other.url()==self.url() and np.all(other.array() == self.array())

    def __str__(self):
        return self.__repr__()

    def __iter__(self):
        """Yield single image for consistency with videos"""
        yield self

    def __len__(self):
        """Images have length 1 always"""
        return 1
    
    def __array__(self):
        """Called on np.array(self) for custom array container, (requires numpy >=1.16)"""
        return self.numpy()
    
    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self._array.shape[0], self._array.shape[1], self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.colorspace() == 'float':
            strlist.append('channels=%d' % self.channels())
        if self.filename() is not None:
            strlist.append('filename=%s' % self.filename())
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        return str('<vipy.image.Image: %s>' % (', '.join(strlist)))

    def sanitize(self):
        """Remove all private keys from the attributes dictionary.
        
        The attributes dictionary is useful storage for arbitrary (key,value) pairs.  However, this storage may contain sensitive information that should be scrubbed from the media before serialization.  As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key.  This is analogous to private or reserved attributes in the python lanugage.  Users should reserve these keynames for those keys that should be sanitized and removed before any serialization of this object.
        
        ```python
        assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
        ```

        """
        self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} if isinstance(self.attributes, dict) else self.attributes
        return self
    
    def print(self, prefix='', sleep=None):
        """Print the representation of the image and return self with an optional sleep=n seconds
        
        Useful for debugging or sequential visualization in long fluent chains.
        """
        print(prefix+self.__repr__())
        if sleep is not None:
            assert sleep > 0, "Sleep must be a non-negative number of seconds"
            time.sleep(sleep)
        return self

    def exif(self, extended=False):
        """Return the EXIF meta-data in filename as a dictionary.  Included non-base EXIF data if extended=True.  Returns empty dictionary if no EXIF exists.  Triggers download but not load."""

        d = {}
        if self.download().hasfilename():
            exif = PIL.Image.open(self.filename()).getexif()
            if exif is not None:
                d = {PIL.ExifTags.TAGS[k]:v for (k,v) in exif.items() if k in PIL.ExifTags.TAGS}

            if extended:
                for ifd_id in PIL.ExifTags.IFD:
                    try:
                        ifd = exif.get_ifd(ifd_id)                    
                        if ifd_id == PIL.ExifTags.IFD.GPSInfo:
                            resolve = PIL.ExifTags.GPSTAGS
                        else:
                            resolve = PIL.ExifTags.TAGS
                            
                            for k, v in ifd.items():
                                tag = resolve.get(k, k)
                                d[tag] = v
                    except KeyError:
                        pass
        return d
    
    def tile(self, tilewidth, tileheight, overlaprows=0, overlapcols=0):
        """Generate an image tiling.
        
        A tiling is a decomposition of an image into overlapping or non-overlapping rectangular regions.  

        Args:
            tilewidth: [int] the image width of each tile
            tileheight: [int] the image height of each tile
            overlaprows: [int] the number of overlapping rows (height) for each tile
            overlapcols: [int] the number of overlapping width (width) for each tile
    
        Returns:
            A list of `vipy.image.Image` objects such that each image is a single tile and the set of these tiles forms the original image
            Each image in the returned list contains the 'tile' attribute which encodes the crop used to create the tile.

        .. note:: 
            - `vipy.image.Image.tile` can be undone using `vipy.image.Image.untile`
            - The identity tiling is im.tile(im.width(), im.height(), overlaprows=0, overlapcols=0)
            - Ragged tiles outside the image boundary are zero padded
            - All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert tilewidth > 0 and tileheight > 0 and overlaprows >= 0 and overlapcols >= 0, "Invalid input"
        assert self.width() >= tilewidth-overlapcols and self.height() >= tileheight-overlaprows, "Invalid input" 
        bboxes = [BoundingBox(xmin=i, ymin=j, width=min(tilewidth, self.width()-i), height=min(tileheight, self.height()-j)) for i in range(0, self.width()-overlapcols, tilewidth-overlapcols) for j in range(0, self.height()-overlaprows, tileheight-overlaprows)]
        return [self.clone(shallow=True, attributes=True).setattribute('tile', {'crop':bb, 'shape':self.shape()}).crop(bb) for bb in bboxes]

    def union(self, other):
        """No-op for `vipy.image.Image`"""
        return self
    
    @classmethod
    def untile(cls, imlist):
        """Undo an image tiling and recreate the original image.

        ```python
        tiles = im.tile(im.width()/2, im.height()/2, 0, 0)
        imdst = vipy.image.Image.untile(tiles)
        assert imdst == im
        ```

        Args:
            imlist: this must be the output of `vipy.image.Image.tile`
        
        Returns:
            A  new `vipy.image.Image` object reconstructed from the tiling, such that this is equivalent to the input to vipy.image.Image.tile` 
        
        .. note:: All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert all([isinstance(im, vipy.image.Image) and im.hasattribute('tile') for im in imlist]), "invalid image tile list"        
        imc = None
        for im in imlist:
            if imc is None:
                imc = im.clone(shallow=True).array(np.zeros( (im.attributes['tile']['shape'][0], im.attributes['tile']['shape'][1], im.channels()), dtype=np.uint8))                
            imc = imc.splat(im.array(im.attributes['tile']['crop'].clone().to_origin().int().crop(im.array())), im.attributes['tile']['crop'])
            if hasattr(im, 'objectmap'):
                im.objectmap(lambda o: o.set_origin(im.attributes['tile']['crop']))  # FIXME: only for Scene()
            imc = imc.union(im)
        return imc
    
    def uncrop(self, bb, shape):
        """Uncrop using provided bounding box and zeropad to shape=(Height, Width).

        An uncrop is the inverse operation for a crop, which preserves the cropped portion of the image in the correct location and replaces the rest with zeros out to shape.
    
        ```python
        im = vipy.image.RandomImage(128, 128)
        bb = vipy.geometry.BoundingBox(xmin=0, ymin=0, width=64, height=64)
        uncrop = im.crop(bb).uncrop(bb, shape=(128,128))
        ```

        Args:
            bb: [`vipy.geometry.BoundingBox`] the bounding box used to crop the image in self
            shape: [tuple] (height, width) of the uncropped image
    
        Returns:
            this `vipy.image.Image` object with the pixels uncropped.

        .. note:: NOT idempotent.  This will generate different results if run more than once.
        """
        ((x,y,w,h), (H,W)) = (bb.xywh(), shape)
        ((dyb, dya), (dxb, dxa)) = ((int(y), int(H-(y+h))), (int(x), int(W-(x+w))))
        self._array = np.pad(self.load().array(),
                             ((dyb, dya), (dxb, dxa), (0, 0)) if
                             self.load().array().ndim == 3 else ((dyb, dya), (dxb, dxa)),
                             mode='constant')        
        return self

    def splat(self, im, bb):
        """Replace pixels within boundingbox in self with pixels in im"""
        assert isinstance(im, vipy.image.Image), "invalid image"
        assert (im.width() == bb.width() and im.height() == bb.height()) or bb.isinterior(im.width(), im.height()) and bb.isinterior(self.width(), self.height()), "Invalid bounding box '%s'" % str(bb)
        (x,y,w,h) = bb.xywh()
        self._array[int(y):int(y+h), int(x):int(x+w)] = im.array() if (im.width() == bb.width() and im.height() == bb.height()) else im.array()[int(y):int(y+h), int(x):int(x+w)]
        return self            
        
    def store(self):
        """Store the current image file as an attribute of this object.  Useful for archiving an object to be fully self contained without any external references.  
        
           -Remove this stored image using unstore()
           -Unpack this stored image and set up the filename using restore() 
           -This method is more efficient than load() followed by pkl(), as it stores the encoded image as a byte string.
           -Useful for creating a single self contained object for distributed processing.  

        ```python
        v == v.store().restore(v.filename()) 
        ```

        """
        assert self.hasfilename(), "Image file not found"
        with open(self.filename(), 'rb') as f:
            self.attributes['__image__'] = f.read()
        return self

    def unstore(self):
        """Delete the currently stored image from store()"""
        return self.delattribute('__image__')

    def restore(self, filename):
        """Save the currently stored image to filename, and set up filename"""
        assert self.hasattribute('__image__'), "Image not stored"
        with open(filename, 'wb') as f:
            f.write(self.attributes['__image__'])
        return self.filename(filename)                
    
    def abspath(self):
        """Change the path of the filename from a relative path to an absolute path (not relocatable)"""
        return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))

    def relpath(self, parent=None):
        """Replace the filename with a relative path to parent (or current working directory if none)"""
        parent = parent if parent is not None else os.getcwd()
        assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename())
        return self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent))

    def canload(self):
        """Return True if the image can be loaded successfully, useful for filtering bad links or corrupt images"""
        if not self.isloaded():
            try:
                if isimagefile(self._filename) and os.path.exists(self._filename):
                    PIL.Image.open(self._filename).verify()  # faster, throws exception on corrupted image
                else:
                    self.load().flush()  # fallback, load it and flush to avoid memory leak (expensive)
                return True
            except:
                return False
        else:
            return True
        
    def dict(self):
        """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding"""
        return {k.lstrip('_'):getattr(self, k) for k in Image.__slots__}  # prettyjson (remove "_" prefix to attributes)                                    

    def json(self, encode=True):
        if not vipy.util.is_jsonable(self.attributes):
            raise ValueError('attributes dictionary contains non-json elements and cannot be serialized.  Try self.clear_attributes() or self.sanitize()')        
        d = {k:v for (k,v) in self.dict().items() if v is not None}  # filter empty
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() first, then reload the image from backing filename/url after json import')
            d['array'] = self._array.tolist()
        return json.dumps(d) if encode else d
        
    def loader(self, f, x=None):
        """Lambda function to load an unsupported image filename to a numpy array.
        
        This lambda function will be executed during load and the result will be stored in self._array
        """
        self._loader = (f, x if x is not None else self.filename()) if f is not None else None
        return self

    @staticmethod
    def bytes_array_loader(x):
        """Load from a bytes array"""
        return np.array(PIL.Image.open(io.BytesIO(x)))
    
    @staticmethod    
    def PIL_loader(x):
        """Load from a PIL image file object"""
        return np.array(x)

    def has_loader(self):
        return self._loader is not None

    
    def load(self, verbose=False):
        """Load image to cached private '_array' attribute.

        Args:
            verbose: [bool] If true, show additional useful printed output

        Returns:
            This `vipy.image.Image` object with the pixels loaded in self._array as a numpy array.

        .. note:: This loader supports any image file format supported by PIL.  A custom loader can be added using `vipy.image.Image.loader`.
        """
        try:
            # Return if previously loaded image
            if self._array is not None:
                return self

            # Download URL to filename 
            if self._url is not None and not self.hasfilename():
                self.download(verbose=verbose)

            # Load filename to numpy array
            if self._loader is not None:
                (f,x) = self._loader
                self._array = f(x)
                if self.isluminance():
                    self.colorspace('lum')
                elif self.iscolor():
                    self.colorspace('rgb')
                else:
                    self._array = np.float32(self._array)
                    self.colorspace('float')

            elif isimagefile(self._filename):
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
                if self.istransparent():
                    self.colorspace('rgba')  # must be before iscolor()
                elif self.iscolor():
                    self.colorspace('rgb')
                elif self.isgrey():
                    self.colorspace('grey')
                elif self.isluminance():
                    self.colorspace('lum')
                else:
                    log.warning('unknown colorspace for image "%s" - attempting to coerce to colorspace=float' % str(self._filename))
                    self._array = np.float32(self._array)
                    self.colorspace('float')
            elif iswebp(self._filename):
                import vipy.video
                return vipy.video.Video(self._filename).load()
            elif self.hasfilename() and hasextension(self._filename):
                raise ValueError('Non-standard image extensions require a custom loader')
            elif self.hasfilename():
                # Attempting to open it anyway, may be an image file without an extension. Cross your fingers ...
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
            elif not self.hasfilename() and self.hasattribute('__shape'):
                # Loading a previously flushed buffer, load zeros so that we can display superclass objects
                self._array = np.zeros( self.getattribute('__shape') )
                self.delattribute('__shape')
            else:
                raise ValueError('image file not defined')
            
        except IOError:
            if verbose is True:
                log.error('IO error loading "%s" ' % self.filename())
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose is True:
                log.error('Load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def download(self, timeout=10, verbose=False, cached=False):
        """Download URL to filename provided by constructor, or to temp filename.

        Args:
            timeout: [int]  The timeout in seconds for an http or https connection attempt.  See also [urllib.request.urlopen](https://docs.python.org/3/library/urllib.request.html).
            verbose: [bool] If true, output more helpful message.
            cached: [bool] If true, use the cached previously downloaded file (if it exists)

        Returns:
            This `vipy.image.Image` object with the URL downloaded to `vipy.image.Image.filename` or to a `vipy.util.tempimage` filename which can be retrieved with `vipy.image.Image.filename`.
        """
        if self._url is None and self._filename is not None:
            return self
        if self._url is None or not isurl(str(self._url)):
            raise ValueError('[vipy.image.download][ERROR]: '
                             'Invalid URL "%s" ' % self._url)

        if self._filename is None:
            if vipy.globals.cache() is not None:
                # There is a potential race condition here when downloading files with common names like "main.jpg", add a (repeatable, hashed) 3 character subdir (<=4096 subdirs for ext3, max ~32K)
                self._filename = os.path.join(remkdir(vipy.globals.cache()), stringhash(self._url, 3), filetail(self._url.split('?')[0]))  # preserve image filename from url
                self._filename = self._filename+'.jpg' if not has_image_extension(self._filename) else self._filename  # guess JPG for URLs with no file extension (e.g. php)
            elif isimageurl(self._url):
                self._filename = tempimage(fileext(self._url))
            else:
                self._filename = tempjpg()  # guess JPG for URLs with no file extension

        if cached and self.hasfilename():
            return self
            
        try:
            url_scheme = urllib.parse.urlparse(self._url)[0]
            if url_scheme in ['http', 'https']:
                vipy.downloader.download(self._url,
                                         self._filename,
                                         verbose=verbose,
                                         progress=False,
                                         timeout=timeout,
                                         sha1=self.getattribute('url_sha1'),
                                         username=self.getattribute('url_username'),
                                         password=self.getattribute('url_password'))
            elif url_scheme == 'file':
                shutil.copyfile(self._url, self._filename)
            elif url_scheme == 's3':
                raise NotImplementedError('see vipy.downloader.s3()')                
            else:
                raise NotImplementedError(
                    'Invalid URL scheme "%s" for URL "%s"' %
                    (url_scheme, self._url))

        except (httplib.BadStatusLine,
                urllib.error.URLError,
                urllib.error.HTTPError):
            if verbose is True:
                log.error('download failed for url "%s"' % self._url)
            self._array = None
            raise

        except IOError:
            if verbose:
                log.error('IO error downloading "%s" -> "%s" ' % (self.url(), self.filename()))
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose:
                log.error('load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def reload(self):
        """Flush the image buffer to force reloading from file or URL"""
        return self.clone(flush=True).load()

    def isloaded(self):
        """Return True if `vipy.image.Image.load` was successful in reading the image, or if the pixels are present in `vipy.image.Image.array`."""
        return self._array is not None

    def loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None

    def is_loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None
    
    def isdownloaded(self):
        """Does the filename returned from `vipy.image.Image.filename` exist, meaning that the url has been downloaded to a local file?"""
        return self._filename is not None and os.path.exists(self._filename)

    def is_downloaded(self):
        """Alias for ``vipy.image.Image.isdownloaded`"""
        return self.isdownloaded()
    
    def downloadif(self, timeout=10, verbose=False):
        """Download URL to filename if the filename has not already been downloaded"""
        return self.download(timeout=timeout, verbose=verbose, cached=True) if self.hasurl() else self

    def try_download(self, timeout=10, verbose=False):
        """Attempt to download URL to filename if the filename has not already been downloaded, return object on failure.  Check `vipy.image.Image.is_downloaded` on returned object for success"""
        try:
            return self.downloadif(timeout=timeout, verbose=verbose)
        except:
            return self

    def try_load(self):
        """Attempt to load an image, return the object on failure.  Check `vipy.image.Image.is_loaded` on returned object for success"""
        try:
            return self.load()
        except:
            return self
        
    def channels(self):
        """Return integer number of color channels"""
        return self.load().channels() if not self.isloaded() else (1 if self._array.ndim==2 else self._array.shape[2])

    def iscolor(self):
        """Color images are three channel or four channel with transparency, float32 or uint8"""
        return self.channels() == 3 or self.channels() == 4

    def istransparent(self):
        """Transparent images are four channel color images with transparency, float32 or uint8.  Return true if this image contains an alpha transparency channel"""
        return self.channels() == 4

    def blend(self, im, alpha):
        """alpha blend self and im in-place, such that self = alpha*self + (1-alpha)*im"""
        assert isinstance(im, Image)
        assert alpha >=0 and alpha <= 1
        assert self.colorspace() not in ['float','rgba','bgra'], "convert to rgb first"
        return self.load().map(lambda arr: np.uint8(alpha * arr + (1-alpha)*im.clone().load()._to_colorspace(self.colorspace()).resize_like(self).array()))
                
    def isgrey(self):
        """Grey images are one channel, float32"""
        return self.channels() == 1 and self.array().dtype == np.float32

    def isluminance(self):
        """Luninance images are one channel, uint8"""
        return self.channels() == 1 and self.array().dtype == np.uint8

    def filesize(self):
        """Return size of underlying image file, requires fetching metadata from filesystem"""
        assert self.hasfilename(), 'Invalid image filename'
        return os.path.getsize(self._filename)

    def width(self):
        """Return the width (columns) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().array().shape[1]

    def height(self):
        """Return the height (rows) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """        
        return self.load().array().shape[0]
    
    def shape(self):
        """Return the (height, width) or equivalently (rows, cols) of the image.
        
        Returns:
            A tuple (height=int, width=int) of the image.

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return (self.load().height(), self.width())

    def aspectratio(self):
        """Return the aspect ratio of the image as (width/height) ratio.

        Returns:
            A float equivalent to (`vipy.image.Image.width` / `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().width() / float(self.height())

    def area(self):
        """Return the area of the image as (width * height).

        Returns:
            An integer equivalent to (`vipy.image.Image.width` * `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.width()*self.height()
    
    def centroid(self):
        """Return the real valued center pixel coordinates of the image (col=x,row=y).
        
        The centroid is equivalent to half the `vipy.image.Image.shape`.

        Returns:
            A tuple (column, row) of the floating point center of the image.
        """
        return (self.load().width() / 2.0, self.height() / 2.0)

    def centerpixel(self):
        """Return the integer valued center pixel coordinates of the image (col=i,row=j)

        The centerpixel is equivalent to half the `vipy.image.Image.shape` floored to the nearest integer pixel coordinate.

        Returns:
            A tuple (int(column), int(row)) of the integer center of the image.
        """
        c = np.round(self.centroid())
        return (int(c[0]), int(c[1]))
    
    def array(self, np_array=None, copy=False):
        """Replace self._array with provided numpy array

        Args:
            np_array: [numpy array] A new array to use as the pixel buffer for this image.
            copy: [bool] If true, copy the buffer using np.copy(), else use a reference to this buffer.

        Returns:
            - If np_array is not None, return the `vipy.image.Image` object such that this object points to the provided numpy array as the pixel buffer
            - If np_array is None, then return the numpy array.

        .. notes:: 
            - If copy=False, then this `vipy.image.Image` object will share the pixel buffer with the owner of np_array.  Changes to pixels in this buffer will be shared.  
            - If copy=True, then this will significantly slow down processing for large images.  Use referneces wherevery possible.
        """
        if np_array is None:
            return self._array if copy is False else np.copy(self._array)
        elif isnumpyarray(np_array):
            self._array = np.copy(np_array) if copy else np_array  # reference or copy
            assert self._array.dtype == np.float32 or self._array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32 and not type='%s'" % (str(self._array.dtype))                        
            self.colorspace(None)  # must be set with colorspace() after array() but before _to_colorspace()
            return self
        else:
            raise ValueError('Invalid input - array() must be numpy array and not "%s"' % (str(type(np_array))))

    def fromarray(self, data):
        """Alias for `vipy.image.Image.array` with copy=True. This will set new numpy array as the pixel buffer with a numpy array copy"""
        return self.array(data, copy=True)
    
    def tonumpy(self):
        """Alias for `vipy.image.Image.numpy"""
        return self.numpy()

    def numpy(self):
        """Return a mutable numpy array for this `vipy.image.Image`.

        .. notes:: 
            - This will always return a writeable array with the 'WRITEABLE' numpy flag set.  This is useful for returning a mutable numpy array as needed while keeping the original non-mutable numpy array (e.g. loaded from a video or PIL) as the underlying pixel buffer for efficiency reasons.
            - Triggers a `vipy.image.Image.load` if the pixel buffer has not been loaded
            - This will trigger a copy if the ['WRITEABLE' flag](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html) is not set. 
        """        
        self.load()
        self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array  # triggers copy         
        return self._array

    def channel(self, k=None):
        """Return a cloned Image() object for the kth channel, or return an iterator over channels if k=None.

        Iterate over channels as single channel luminance images:

        ```python
        for c in self.channel():
            print(c)
        ```

        Return the kth channel as a single channel luminance image:

        ```python
        c = self.channel(k=0)
        ```

        """
        if k is None:
            return [self.channel(j) for j in range(0, self.channels())]
        elif k == 0 and self.channels() == 1:
            return self
        else:
            assert k < self.channels() and k>=0, "Requested channel=%d must be within valid channels=%d" % (k, self.channels())
            im = self.clone().load()
            im._array = im._array[:,:,k]
            im._colorspace = 'lum'
            return im

    def channelmean(self):
        """Return a cloned Image() object for the mean of all channels followed by returning a single channel float image.

        This is useful for visualizing multichannel images by reducing the channels to one

        ```python
        vipy.image.Image(array=np.random.rand(3,3,16).astype(np.float32)).channelmean().mat2gray().lum().show()
        ```
        
        """
        im = self.clone().load()
        im._array = np.mean(im._array, axis=2, keepdims=True)
        im._colorspace = 'float'
        return im
        
    def red(self):
        """Return red channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':
        
        ```python
        self.red() == self.channel(0) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.red() == self.channel(3) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(0)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(3)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def green(self):
        """Return green channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.green() == self.channel(1) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.green() == self.channel(1) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(1)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(1)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def blue(self):
        """Return blue channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.vlue() == self.channel(2) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.blue() == self.channel(0) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(2)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(0)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())                

    def alpha(self):
        """Return alpha (transparency) channel as a cloned single channel `vipy.image.Image` object"""
        assert self.channels() == 4 and self.colorspace() in ['rgba', 'bgra'], "Must be four channnel color image"
        return self.channel(3)
        
    def zeros(self):
        """Set the pixel buffer to all zeros of the same shape and datatype as this `vipy.image.Image` object.
        
        These are equivalent operations for the resulting buffer shape: 
        
        ```python
        import numpy as np
        np.zeros( (self.width(), self.height(), self.channels()) ) == self.zeros().array()
        ```

        Returns:
           This `vipy.image.Image` object.

        .. note:: Triggers load() if the pixel buffer has not been loaded yet.
        """
        self._array = 0*self.load()._array
        return self

    def pil(self):
        """Convert vipy.image.Image to PIL Image.
        
        Returns:
            A [PIL image](https://pillow.readthedocs.io/en/stable/reference/Image.html) object, that shares the pixel buffer by reference
        """
        if self.isloaded():
            assert self.channels() in [1,3,4] and (self.channels() == 1 or self.colorspace() != 'float'), "Incompatible with PIL"
            return PIL.Image.fromarray(self.numpy(), mode='RGB' if self.colorspace()=='rgb' else None)  # FIXME: mode='RGB' triggers slow tobytes() conversion, need RGBA or RGBX
        elif self.hasfilename():
            return PIL.Image.open(self.filename())
        else:
            return None
            
    def blur(self, sigma=3):
        """Apply a Gaussian blur with Gaussian kernel radius=sigma to the pixel buffer.
        
        Args:
            sigma: [float >=0] The gaussian blur kernel radius.

        Returns:
            This `vipy.image.Image` object with the pixel buffer blurred in place.
        """
        assert sigma >= 0
        return self.array(np.array(self.pil().filter(PIL.ImageFilter.GaussianBlur(radius=sigma)))) if sigma>0 else self
        
    def torch(self, order='CHW'):
        """Convert the batch of 1 HxWxC images to a CxHxW torch tensor.

        Args:
            order: ['CHW', 'HWC', 'NCHW', 'NHWC'].  The axis order of the torch tensor (channels, height, width) or (height, width, channels) or (1, channels, height, width) or (1, height, width, channels)

        Returns:
            A CxHxW or HxWxC or 1xCxHxW or 1xHxWxC [torch tensor](https://pytorch.org/docs/stable/tensors.html) that shares the pixel buffer of this image object by reference.

        .. note:: This supports numpy types and does not support bfloat16
        """
        from torch import from_numpy;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow)
        
        assert order in ['CHW', 'HWC', 'NCHW', 'NHWC']
        img = self.numpy() if self.array().ndim >= 3 else np.expand_dims(self.array(), 2)  # HxW -> HxWx1 
        
        if order in ['CHW']:
            assert img.ndim == 3, "invalid array"  
            img = img.transpose(2,0,1) # HxWxC -> CxHxW
        elif order in ['NCHW']:
            img = img.transpose(3,2,0,1) if img.ndim == 4 else np.expand_dims(img.transpose(2,0,1), 0)
        if order in ['NHWC']:
            img = img.transpose(3,0,1,2) if img.ndim == 4 else np.expand_dims(img, 0)
        return from_numpy(img)   # pip install torch

    
    @staticmethod
    def from_torch(x, order='CHW'):
        """Convert a 1xCxHxW, CxHxW or NxCxHxW torch tensor (or numpy array with torch channel order) to HxWxC numpy array, returns new `vipy.image.Image` with inferred colorspace corresponding to data type in x"""
        from torch import Tensor, is_tensor;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow) 
        assert isinstance(x, Tensor) or isinstance(x, np.ndarray), "Invalid input type '%s'- must be torch.Tensor" % (str(type(x)))
        assert x.ndim == 4 or x.ndim == 3, "Torch tensor must be shape 1xCxHxW, CxHxW, or NxCxHxW"
        x = x.squeeze(0) if (x.ndim == 4 and x.shape[0] == 1) else x

        if order == 'CHW':
            x = x.permute(1,2,0).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,2,0)   # CxHxW -> HxWxC, copied            
        elif order == 'WHC':
            x = x.permute(1,0,2).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,0,2)   # WxHxC -> HxWxC, copied        
        elif order == 'HWC':
            x = x.cpu().detach().float().numpy() if is_tensor(x) else np.copy(x)  # HxWxC -> HxWxC, copied        
        elif order == 'NCHW':
            assert x.ndim == 4, "invalid shape"
            x = x.permute(2,3,1,0).cpu().detach().float().numpy()  # NxCxHxW -> HxWxCxN, copied        
        else:
            raise ValueError('unknown axis order "%s"' % order)

        img = x
        colorspace = 'float' if img.dtype == np.float32 else None
        colorspace = 'rgb' if img.dtype == np.uint8 and img.shape[2] == 3 else colorspace  # assumed
        colorspace = 'lum' if img.dtype == np.uint8 and img.shape[2] == 1 else colorspace
        return Image(array=img, colorspace=colorspace)

    @staticmethod
    def fromtorch(x, order='CHW'):
        """Alias for `vipy.image.Image.from_torch`"""
        return Image.from_torch(x, order)
    
    def unload(self):
        """Remove cached file and loaded array.  Note that this will delete the underlying file returned by filename() if there is a backing url, cleaning up cached files and forcing re-download"""
        if self.hasurl() and self.hasfilename():
            log.info('Removing "%s"'% self._filename)
            os.remove(self._filename)
            self._filename = None
        if self.isloaded():
            self.flush()
        return self

    def uncache(self):
        """Alias for `vipy.image.Image.unload`"""
        return self.unload()
    
    def filename(self, newfile=None):
        """Return or set image filename"""
        if newfile is None:
            return self._filename
        else:
            self._filename = newfile
            return self

    def clear_filename(self):
        """Remove the current filename from the object in-place and return the object"""        
        self._filename = None
        return self
    
    def url(self, url=None, username=None, password=None, sha1=None):
        """Image URL and URL download properties"""
        if url is not None:
            self._url = url  # this does not change anything else (e.g. the associated filename), better to use constructor 
        if username is not None:
            self.setattribute('url_username', username)
        if password is not None:
            self.setattribute('url_password', password)
        if sha1 is not None:
            self.setattribute('url_sha1', sha1)
        if url is None and username is None and password is None and sha1 is None:
            return self._url
        else:
            return self
    
    def colorspace(self, colorspace=None):
        """Return or set the colorspace as ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']"""
        if colorspace is None:
            return self._colorspace
        else:
            assert str(colorspace).lower() in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s'. Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum']" % colorspace 
            img = self.array()
            if self.isloaded():
                colorspace = str(colorspace).lower()
                if self.array().dtype == np.float32:
                    assert colorspace in ['float', 'grey', 'gray'], "Invalid colorspace '%s' for float32 array()" % colorspace
                elif self.array().dtype == np.uint8:
                    assert colorspace in  ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum'], "Invalid colorspace '%s' for uint8 array(). Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum']" % colorspace
                else:
                    raise ValueError('unupported array() datatype "%s".  Allowable is [np.float32, np.uint8]' % colorspace)  # should never get here as long as array() is used to set _array
                if self.channels() == 1:
                    assert colorspace in ['float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s; for single channel array.  Allowable is ['float', 'grey', 'gray', 'lum']" % colorspace
                elif self.channels() == 3:
                    assert colorspace in ['float', 'rgb', 'bgr', 'hsv'], "Invalid colorspace '%s; for three channel array. Allowable is ['float', 'rgb', 'bgr', 'hsv']" % colorspace
                elif self.channels() == 4:
                    assert colorspace in ['float', 'rgba', 'bgra'], "Invalid colorspace '%s; for four channel array. Allowable is ['float', 'rgba', 'bgra']" % colorspace                    
                elif colorspace != 'float':
                    raise ValueError("Invalid colorspace '%s' for image channels=%d, type=%s" % (colorspace, self.channels(), str(self.array().dtype)))
                if colorspace in ['grey', 'gray']:
                    assert self.max() <= 1 and self.min() >= 0, "Colorspace 'grey' image must be np.float32 in range [0,1].  Use colorspace 'lum' for np.uint8 in range [0,255], or colorspace 'float' for unconstrained np.float32 [-inf, +inf]"
                    colorspace = 'grey'  # standardize
            self._colorspace = str(colorspace).lower()
            return self

    def uri(self):
        """Return the URI of the image object, either the URL or the filename, raise exception if neither defined"""
        if self.hasurl():
            return self.url()
        elif self.hasfilename():
            return self.filename()
        else:
            raise ValueError('No URI defined')

    def set_attribute(self, key, value):
        """Set element self.attributes[key]=value"""
        if self.attributes is None:
            self.attributes = {key: value}
        else:
            self.attributes[key] = value
        return self
    
    def setattribute(self, key, value):
        return self.set_attribute(key, value)
        
    def setattributes(self, newattr):
        """Set many attributes at once by providing a dictionary to be merged with current attributes"""
        assert isinstance(newattr, dict), "New attributes must be dictionary"
        self.attributes.update(newattr)
        return self
    
    def getattribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.get_attribute(k)

    def get_attribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.attributes[k] if k in self.attributes else None        
    
    def clear_attributes(self):
        self.attributes = {}
        return self
    
    def hasattribute(self, key):
        return self.attributes is not None and key in self.attributes

    def delattribute(self, k):
        return self.del_attribute(k)
    
    def del_attribute(self, k):
        if k in self.attributes:
            self.attributes.pop(k)
        return self
        
    def delattributes(self, atts):
        for k in tolist(atts):
            self.delattribute(k)
        return self

    def append_attribute(self, key, value):
        """Append the value to attribute key, creating the key as an empty list if it does not exist"""
        if key not in self.attributes:
            self.attributes[key] = []
        self.attributes[key].append(value)
        return self
    
    def metadata(self, k=None):
        """Return metadata associated with this image, stored in the attributes dictionary"""
        return self.attributes if k is None else self.getattribute(k)
    
    def hasurl(self):
        """synonym for `vipy.image.has_url`"""
        return self.has_url()

    def has_url(self):
        """Return True if the image has a URL input source"""
        return self._url is not None
    
    def has_filename(self):
        """Return True if the image has a filename input source and this file exists"""
        return self._filename is not None and os.path.exists(self._filename)

    def hasfilename(self):
        """synonym for has_filename"""
        return self.has_filename()
    
    def clone(self, flushforward=False, flushbackward=False, flush=False, shallow=False, attributes=False, dereference=False):
        """Create deep copy of object, flushing the original buffer if requested and returning the cloned object.
        Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned 
        object which can be used for encoding and will be garbage collected.
        
            * flushforward: copy the object, and set the cloned object array() to None.  This flushes the video buffer for the clone, not the object
            * flushbackward:  copy the object, and set the object array() to None.  This flushes the video buffer for the object, not the clone.
            * flush:  set the object array() to None and clone the object.  This flushes the video buffer for both the clone and the object.
            * dereference: remove both the filename and URL (if present) in the cloned object, leaving only the buffer
        """
        if flush or (flushforward and flushbackward):
            self.flush()  # flushes buffer on object and clone
            im = copy.deepcopy(self)  # object and clone are flushed
        elif flushbackward:
            im = copy.deepcopy(self)  # propagates _array to clone
            self.flush()  # object flushed, clone not flushed
        elif flushforward:            
            array = self._array;
            self._array = None
            im = copy.deepcopy(self)   # does not propagate _array to clone
            self._array = array    # object not flushed
            im.flush()
        elif shallow:
            im = copy.copy(self)  # shallow copy
            im._array = np.asarray(self._array) if self._array is not None else None  # shared pixels            
        else:
            im = copy.deepcopy(self)
        if attributes:
            im.attributes = copy.deepcopy(self.attributes)
        if dereference:
            assert im._array is not None, "image buffer required"
            im._filename = None
            im._url = None
        return im

    def flush(self):
        """flush the image buffer in place, alias for self.clone(flush=True)"""        
        if not (self.hasfilename() or self.hasurl()):
            self.setattribute('__shape', (self.height(), self.width(), self.channels()))  # to load zeros
        self._array = None  # flushes buffer on object
        return self

        
    # Spatial transformations
    def resize(self, cols=None, rows=None, width=None, height=None, interp='bilinear', fast=False):
        """Resize the image buffer to (rows x cols) with bilinear interpolation.  If rows or cols is provided, rescale image maintaining aspect ratio"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width
        if cols is None or rows is None:
            if cols is None:
                scale = float(rows) / float(self.height())
            else:
                scale = float(cols) / float(self.width())
            self.rescale(scale)
        elif rows == self.height() and cols == self.width():
            return self  
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.array(im.pil().resize((cols, rows), string_to_pil_interpolation(interp))) for im in self.channel()])
        else:
            self._array = np.asarray(self.load().pil().resize((cols, rows), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))  
        return self

    def resize_like(self, im, interp='bilinear'):
        """Resize image buffer to be the same size as the provided vipy.image.Image()"""
        assert isinstance(im, Image), "Invalid input - Must be vipy.image.Image"
        return self.resize(im.width(), im.height(), interp=interp)
    
    def rescale(self, scale=1, interp='bilinear', fast=False):
        """Scale the image buffer by the given factor - NOT idempotent"""
        (height, width) = self.load().shape()
        if scale == 1:
            return self
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.asarray(im.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp))) for im in self.channel()])
        else: 
            self._array = np.asarray(self.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that maximum dimension of image = dim, or return maxdim()"""
        return self.rescale(float(dim) / float(np.maximum(self.height(), self.width())), interp=interp) if dim is not None else max(self.shape())

    def mindim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that minimum dimension of image = dim, or return mindim()"""
        if dim is None:
            return np.minimum(self.height(), self.width())
        else:
            s = float(dim) / float(np.minimum(self.height(), self.width()))
            return self.rescale(s, interp=interp) if dim is not None else min(self.shape())

    def mindimn(self, dim=None):
        """Frequently used shortcut for mindim(dim, interp='nearest')"""
        return self.mindim(dim, interp='nearest')
    
    def _pad(self, dx, dy, mode='edge'):
        """Pad image using np.pad mode, dx=padwidth, dy=padheight, thin wrapper for numpy.pad"""
        self._array = np.pad(self.load().array(),
                             ((dy, dy), (dx, dx), (0, 0)) if
                             self.load().array().ndim == 3 else ((dy, dy), (dx, dx)),
                             mode=mode)
        return self

    def pad(self, padwidth, padheight):
        """Alias for `vipy.image.Image.zeropad`"""
        return self.zeropad(padwidth, padheight)
    
    def zeropad(self, padwidth, padheight):
        """Pad image using np.pad constant by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        if self.channels() > 1 or self._array.ndim == 3:
            pad_shape = (padheight, padwidth, (0, 0))
        else:
            pad_shape = (padheight, padwidth)

        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        if padwidth[0]>0 or padwidth[1]>0 or padheight[0]>0 or padheight[1]>0:
            self._array = np.pad(self.load().array(), pad_width=pad_shape, mode='constant', constant_values=0)  # this is still slow due to the required copy, but fast-ish in np >= 1.17
            
        return self

    def zeropadlike(self, width, height):
        """Zero pad the image balancing the border so that the resulting image size is (width, height)"""
        assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height())
        return self.zeropad( (int(np.floor((width - self.width())/2)), int(np.ceil((width - self.width())/2))),
                             (int(np.floor((height - self.height())/2)), int(np.ceil((height - self.height())/2))))
                            
    def meanpad(self, padwidth, padheight, mu=None):
        """Pad image using np.pad constant=image mean by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""        
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        mu = self.meanchannel() if mu is None else mu
        self._array = np.squeeze(np.dstack([np.pad(img,
                                                   pad_width=(padheight,padwidth),
                                                   mode='constant',
                                                   constant_values=c) for (img,c) in zip(self.channel(), mu)]))
        return self

    def alphapad(self, padwidth, padheight):
        """Pad image using alpha transparency by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        assert self.colorspace() == 'rgba', "Colorspace must be RGBA for padding with transparency"
        return self.meanpad(padwidth, padheight, mu=np.array([0,0,0,0]))
    
    def minsquare(self):
        """Crop image of size (HxW) to (min(H,W), min(H,W)), keeping upper left corner constant"""
        S = np.min(self.load().shape())
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(S), height=int(S)))

    def maxsquare(self, S=None):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with zeropadding or (S,S) if provided, keeping upper left corner constant"""
        S = np.max(self.load().shape()) if S is None else int(S)
        (H, W) = self.shape()
        (dW, dH) = (max(0, S - W), max(0, S - H))
        if S != W or S != H:
            self._crop(BoundingBox(0, 0, width=min(W, S), height=min(H, S)))
            if (dW > 0 or dH > 0):
                self.zeropad((0,dW), (0,dH))  # crop then zeropad
        return self

    def maxmatte(self):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with balanced zeropadding forming a letterbox with top/bottom matte or pillarbox with left/right matte"""
        S = np.max(self.load().shape())
        dW = S - self.width()
        dH = S - self.height()
        return self.zeropad((int(np.floor(dW//2)), int(np.ceil(dW//2))), (int(np.floor(dH//2)), int(np.ceil(dH//2))))._crop(BoundingBox(0, 0, width=int(S), height=int(S)))
    
    def centersquare(self):
        """Crop image of size (NxN) in the center, such that N=min(width,height), keeping the image centroid constant"""
        N = int(np.min(self.shape()))
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=N, height=N))

    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner"""
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def _crop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, clipping the box to the image rectangle"""
        assert isinstance(bbox, BoundingBox) and bbox.valid(), "Invalid input - Must be vipy.geometry.BoundingBox not '%s'" % (str(type(bbox)))
        if not bbox.isdegenerate() and bbox.hasoverlap(self.load().array()):
            bbox = bbox.imclip(self.load().array()).int()
            self._array = self.array()[bbox.ymin():bbox.ymax(),
                                       bbox.xmin():bbox.xmax()]
        else:
            log.warning('BoundingBox for crop() does not intersect image rectangle')
        return self

    def crop(self, bbox):
        return self._crop(bbox)
    
    def fliplr(self):
        """Mirror the image buffer about the vertical axis - Not idempotent"""
        self._array = np.fliplr(self.load().array())
        return self

    def flipud(self):
        """Mirror the image buffer about the horizontal axis - Not idempotent"""
        self._array = np.flipud(self.load().array())
        return self
    
    def imagebox(self):
        """Return the bounding box for the image rectangle"""
        return BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))

    def border_mask(self, pad):
        """Return a binary uint8 image the same size as self, with a border of pad pixels in width or height around the edge"""
        img = np.zeros( (self.height(), self.width()), dtype=np.uint8)
        img[0:pad,:] = 1
        img[-pad:,:] = 1
        img[:,0:pad] = 1
        img[:,-pad:] = 1
        return img
    
    # Color conversion
    def _to_colorspace(self, to):
        """Supported colorspaces are rgb, rgba, bgr, bgra, hsv, grey, lum, float"""
        to = to if to != 'gray' else 'grey'  # standardize 'gray' -> 'grey' internally
        self.load()
        if self.colorspace() == to:
            return self
        elif to == 'float':
            img = self.load().array()  # any type
            self._array = np.array(img).astype(np.float32)  # typecast to float32
        elif self.colorspace() == 'lum':
            img = self.load().array()  # single channel, uint8 [0,255]
            assert img.dtype == np.uint8
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel            
            self._array = np.array(PIL.Image.fromarray(img, mode='L').convert('RGB'))  # uint8 luminance [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() in ['gray', 'grey']:
            img = self.load().array()  # single channel float32 [0,1]
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel                        
            self._array = np.array(PIL.Image.fromarray(255.0 * img, mode='F').convert('RGB'))  # float32 gray [0,1] -> float32 gray [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'rgba':
            img = self.load().array()  # uint8 RGBA
            if to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGBA -> uint8 ABGR
                self._array = self._array[:,:,[1,2,3,0]]  # uint8 ABGR -> uint8 BGRA
            elif to == 'rgb':
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
            else:
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
                self.colorspace('rgb')
                self._to_colorspace(to)
        elif self.colorspace() == 'rgb':
            img = self.load().array()  # uint8 RGB
            if to in ['grey', 'gray']:
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(img).convert('L')).astype(np.float32)  # uint8 RGB -> float32 Grey [0,255] -> float32 Grey [0,1]
            elif to == 'bgr':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
            elif to == 'hsv':
                self._array = np.array(PIL.Image.fromarray(img).convert('HSV'))  # uint8 RGB -> uint8 HSV
            elif to == 'lum':
                self._array = np.array(PIL.Image.fromarray(img).convert('L'))  # uint8 RGB -> uint8 Luminance (integer grey)
            elif to == 'rgba':
                self._array = np.dstack((img, 255*np.ones((img.shape[0], img.shape[1]), dtype=np.uint8)))
            elif to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
                self._array = np.dstack((self._array, np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)))  # uint8 BGR -> uint8 BGRA
        elif self.colorspace() == 'bgr':
            img = self.load().array()  # uint8 BGR
            self._array = np.array(img)[:,:,::-1]  # uint8 BGR -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'bgra':
            img = self.load().array()  # uint8 BGRA
            self._array = np.array(img)[:,:,::-1]  # uint8 BGRA -> uint8 ARGB
            self._array = self._array[:,:,[1,2,3,0]]  # uint8 ARGB -> uint8 RGBA
            self.colorspace('rgba')
            self._to_colorspace(to)
        elif self.colorspace() == 'hsv':
            img = self.load().array()  # uint8 HSV
            self._array = np.array(PIL.Image.fromarray(img, mode='HSV').convert('RGB'))  # uint8 HSV -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'float':
            img = self.load().array()  # float32
            if np.max(img) > 1 or np.min(img) < 0:
                #log.warning('Converting float image to "%s" will be rescaled with self.mat2gray() into the range float32 [0,1]' % to)
                img = self.mat2gray().array()
            if not self.channels() in [1,2,3]:
                raise ValueError('Float image must be single channel or three channel RGB in the range float32 [0,1] prior to conversion')
            if self.channels() == 3:  # assumed RGB
                self._array = np.uint8(255 * self.array())   # float32 RGB [0,1] -> uint8 RGB [0,255]
                self.colorspace('rgb')
            else:
                img = np.squeeze(img, axis=2) if img.ndim == 3 else img
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(np.uint8(255 * img)).convert('L')).astype(np.float32)  # float32 RGB [0,1] -> float32 gray [0,1]                
                self.colorspace('grey')
            self._to_colorspace(to)
        elif self.colorspace() is None:
            raise ValueError('Colorspace must be initialized by constructor or colorspace() to allow for colorspace conversion')
        else:
            raise ValueError('unsupported colorspace "%s"' % self.colorspace())

        self.colorspace(to)
        return self

    def affine_transform(self, A, border='zero'):
        """Apply a 3x3 affine geometric transformation to the image. 

        Args:        
            - A [np.ndarray]: 3x3 affine geometric transform from `vipy.geometry.affine_transform`
            - border [str]:  'zero' or 'replicate' to handle elements outside the image rectangle after transformation

        Returns:
            - This object with only the array transformed

        .. note:: The image will be loaded and converted to float() prior to applying the affine transformation.  
        .. note:: This will transform only the pixels, not objects
        """
        assert isnumpy(A) or isinstance(img, vipy.image.Image), "invalid input"
        assert A.shape == (3,3), "The affine transformation matrix should be the output of vipy.geometry.affine_transformation"
        self._array = vipy.geometry.imtransform(self.load().float().array(), A.astype(np.float32), border=border)
        return self

    def rotate(self, r):
        """Apply a rotation in radians to the pixels, with origin in upper left """
        return self.affine_transform(vipy.geometry.affine_transform(r=r))

    def rotate_by_exif(self):
        """Apply a rotation as specified in the 'Orientation' field EXIF metadata"""
        exif = self.exif()
        orientation = exif['Orientation'] if 'Orientation' in exif else None
        if orientation is None or orientation == 1:
            return self
        elif orientation == 2:
            return self.fliplr()
        elif orientation == 3:
            return self.flipud().fliplr()
        elif orientation == 4:
            return self.flipud()
        elif orientation == 5:
            return self.rot90cw().fliplr()
        elif orientation == 6:
            return self.rot90cw()
        elif orientation == 7:
            return self.rot90ccw().fliplr()
        elif orientation == 8:
            return self.rot90ccw()
        else:
            raise ValueError                        
    
    def rgb(self):
        """Convert the image buffer to three channel RGB uint8 colorspace"""
        return self._to_colorspace('rgb')

    def color_transform(self, colorspace):
        """Transform the image buffer from the current `vipy.image.Image.colorspace` to the provided colorspace"""
        return self._to_colorspace(colorspace)
    
    def colorspace_like(self, im):
        """Convert the image buffer to have the same colorspace as the provided image"""
        assert isinstance(im, vipy.image.Image)
        return self._to_colorspace(im.colorspace())
    
    def rgba(self):
        """Convert the image buffer to four channel RGBA uint8 colorspace"""
        return self._to_colorspace('rgba')

    def hsv(self):
        """Convert the image buffer to three channel HSV uint8 colorspace"""
        return self._to_colorspace('hsv')

    def bgr(self):
        """Convert the image buffer to three channel BGR uint8 colorspace"""
        return self._to_colorspace('bgr')

    def bgra(self):
        """Convert the image buffer to four channel BGR uint8 colorspace"""
        return self._to_colorspace('bgra')

    def float(self):
        """Convert the image buffer to float32"""
        return self._to_colorspace('float')

    def greyscale(self):
        """Convert the image buffer to single channel grayscale float32 in range [0,1]"""
        return self._to_colorspace('gray')

    def grayscale(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def grey(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def gray(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def luminance(self):
        """Convert the image buffer to single channel uint8 in range [0,255] corresponding to the luminance component"""
        return self._to_colorspace('lum')

    def lum(self):
        """Alias for luminance()"""
        return self._to_colorspace('lum')

    def _apply_colormap(self, cm):
        """Convert an image to greyscale, then convert to RGB image with matplotlib colormap"""
        """https://matplotlib.org/tutorials/colors/colormaps.html"""
        cm = plt.get_cmap(cm)
        img = self.grey().numpy()
        self._array = np.uint8(255 * cm(img)[:,:,:3])
        self.colorspace('rgb')
        return self

    def jet(self):
        """Apply jet colormap to greyscale image and save as RGB"""
        return self._apply_colormap('jet')

    def rainbow(self):
        """Apply rainbow colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('gist_rainbow')

    def hot(self):
        """Apply hot colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('hot')

    def bone(self):
        """Apply bone colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('bone')

    def saturate(self, min, max):
        """Saturate the image buffer to be clipped between [min,max], types of min/max are specified by _array type"""
        return self.array(np.minimum(np.maximum(self.load().array(), min), max))

    def intensity(self):
        """Convert image to float32 with [min,max] to range [0,1], force colormap to be 'float'.  Equivalent to self.mat2gray()"""
        self.array((self.load().float().array()) - float(self.min()) / float(self.max() - self.min()))
        return self.colorspace('float')

    def mat2gray(self, min=None, max=None):
        """Convert the image buffer so that [min,max] -> [0,1], forces conversion to 'float' colorspace.  This does not change the number of color channels"""
        self.array(mat2gray(np.float32(self.load().float().array()), min, max))
        return self.colorspace('float')
        return self

    def sum_to_one(self, eps=1E-6):
        """Return float image in the range [0,1] such that all elements sum to one"""
        return self.gain(1.0/(eps+self.mat2gray().sum()))
    
    def gain(self, g):
        """Elementwise multiply gain to image array, Gain should be broadcastable to array().  This forces the colospace to 'float'.  Don't use numba optimization, it is slower than native multiply"""
        #return self.array(vipy.math.gain(self.load()._array, np.float32(g))).colorspace('float') if g != 1 else self        
        #return self.array(np.float32(self.load()._array*g)).colorspace('float') if g != 1 else self  # numba not as fast anymore
        return self.array(np.multiply(self.load().float().array(), g)).colorspace('float') if g != 1 else self

    def bias(self, b):
        """Add a bias to the image array.  Bias should be broadcastable to array().  This forces the colorspace to 'float'"""
        self.array(self.load().float().array() + b)
        return self.colorspace('float')

    def normalize(self, gain, bias):
        """Apply a multiplicative gain g and additive bias b, such that self.array() == gain*self.array() + bias.

        This is useful for applying a normalization of an image prior to calling `vipy.image.Image.torch`.

        The following operations are equivalent.

        ```python
        im = vipy.image.RandomImage()
        im.normalize(1/255.0, 0.5) == im.gain(1/255.0).bias(-0.5)
        ```
        
        .. note:: This will force the colorspace to 'float'
        """
        return self.array(gain*self.load().float().array() + bias).colorspace('float')

    def additive_noise(self, hue=(-15,15), saturation=(-15,15), brightness=(-15,15)):
        """Apply uniform random additive noise in the given range to the given HSV color channels.  Image will be converted to HSV prior to applying noise."""
        assert isinstance(hue, tuple) and len(hue) == 2 and hue[1]>=hue[0]
        assert isinstance(saturation, tuple) and len(saturation) == 2 and saturation[1]>=saturation[0]
        assert isinstance(brightness, tuple) and len(brightness) == 2 and brightness[1]>=brightness[0]        
        
        (H,W,C) = (self.height(), self.width(), self.channels())
        noise = np.dstack(((hue[1]-hue[0])*np.random.rand(H,W)+hue[0],
                           (saturation[1]-saturation[0])*np.random.rand(H,W)+saturation[0],
                           (brightness[1]-brightness[0])*np.random.rand(H,W)+brightness[0]))
        return self.array( np.minimum(np.maximum(self.hsv().array() + noise, 0), 255).astype(np.uint8) )
            
    # Image statistics
    def stats(self):
        log.info(self)
        log.info('  Channels: %d' % self.channels())
        log.info('  Shape: %s' % str(self.shape()))
        log.info('  min: %s' % str(self.min()))
        log.info('  max: %s' % str(self.max()))
        log.info('  mean: %s' % str(self.mean()))
        log.info('  channel mean: %s' % str(self.meanchannel()))        
    
    def min(self):
        return self.minpixel()

    def minpixel(self):
        return np.min(self.load().array().flatten())
    
    def max(self):
        return self.maxpixel()

    def maxpixel(self):
        return np.max(self.load().array().flatten())
    
    def mean(self):
        """Mean over all pixels"""
        return np.mean(self.load().array().flatten())

    def meanchannel(self, k=None):
        """Mean per channel over all pixels.  If channel k is provided, return just the mean for that channel"""
        C = np.mean(self.load().array(), axis=(0, 1)).flatten()
        return C[k] if k is not None else C
    
    def sum(self):
        return np.sum(self.load().array().flatten())

    # Image visualization
    def closeall(self):
        """Close all open figure windows"""
        vipy.show.closeall()
        return self
    
    def close(self, fignum=None):
        """Close the requested figure number, or close all of fignum=None"""
        if fignum is None:
            return self.closeall()
        else:
            vipy.show.close(fignum)
            return self
    
    def show(self, figure=1, nowindow=False, timestamp=None, mutator=None, theme='dark'):
        """Display image on screen in provided figure number (clone and convert to RGB colorspace to show), return object"""
        assert self.load().isloaded(), 'Image not loaded'
        timestampfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark' else 'black'
        im = self.clone() if not mutator else mutator(self.clone())        
        vipy.show.imshow(im.rgb().numpy(), fignum=figure, nowindow=nowindow, timestamp=timestamp, timestampfacecolor=timestampfacecolor, flush=True, timestampcolor=timestampcolor)
        return self

    def save(self, filename=None, quality=75):
        """Save the current image to a new filename and return the image object.  Resets edit history"""
        return self.filename(self.saveas(filename if filename   is not None else tempjpg(), quality=quality)).loader(None).flush_array()
        
        
    # Image export
    def pkl(self, pklfile=None):
        """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains"""
        assert pklfile is not None or self.filename() is not None
        pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl')
        remkdir(vipy.util.filepath(pklfile))
        vipy.util.save(self, pklfile)
        return self

    def pklif(self, b, pklfile=None):
        """Save the object to the provided pickle file only if b=True. Useful for conditional intermediate saving in long fluent chains"""
        assert isinstance(b, bool)
        return self.pkl(pklfile) if b else self

    def saveas(self, filename=None, writeas=None, quality=75):
        """Save current buffer (not including drawing overlays) to new filename and return filename.  If filename is not provided, use a temporary JPEG filename."""
        filename = tempjpg() if filename is None else filename
        if self.colorspace() in ['gray']:
            imwritegray(self.grayscale()._array, filename, quality=quality)
        elif self.colorspace() != 'float':
            imwrite(self.load().array(), filename, writeas=writeas, quality=quality)
        else:
            raise ValueError('Convert float image to RGB or gray first. Try self.mat2gray()')
        return filename

    def saveastmp(self):
        """Save current buffer to temp JPEG filename and return filename.  Alias for savetmp()"""
        return self.saveas(tempjpg())

    def savetmp(self):
        """Save current buffer to temp JPEG filename and return filename.   Alias for saveastmp()"""
        return self.saveastmp()

    def tocache(self):
        """Save current buffer to temp JPEG filename in the VIPY cache and return filename."""
        return self.saveas(vipy.util.tocache(tempjpg()))
    
    def base64(self):
        """Export a base64 encoding of the image suitable for embedding in an html page"""
        buf = io.BytesIO()
        self.clone().rgb().pil().save(buf, format='JPEG')
        return base64.b64encode(buf.getvalue())
        
    def ascii(self):
        """Export a base64 ascii encoding of the image suitable for embedding in an <img> tag"""
        return self.base64().decode('ascii')

    def html(self, alt=None, id=None, attributes={'loading':'lazy'}):
        """Export a base64 encoding of the image suitable for embedding in an html page, enclosed in <img> tag
           
           Returns:
              -string:  <img src="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" loading="lazy"> containing base64 encoded JPEG and alt text with lazy loading
        """
        assert isinstance(attributes, dict)
        b = self.base64().decode('ascii')
        alt_text = alt if alt is not None else self.filename()
        id = id if id is not None else self.filename()
        attr = ' '.join(['%s="%s"' % (str(k),str(v)) for (k,v) in attributes.items()])
        return '<img %ssrc="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" %s>' % (('id="%s" ' % id) if id is not None else '', b, str(alt_text), attr)

    def annotate(self, timestamp=None, mutator=None, theme='dark'):
        """Change pixels of this image to include rendered annotation and return an image object"""
        # FIXME: for k in range(0,10): self.annotate().show(figure=k), this will result in cumulative figures
        return vipy.image.Image(array=self.savefig(timestamp=timestamp, theme=theme, mutator=mutator).rgb().array(), colorspace='rgb')

    def savefig(self, filename=None, figure=1, timestamp=None, theme='dark', mutator=None):
        """Save last figure output from self.show() with drawing overlays to provided filename and return filename"""
        self.show(figure=figure, nowindow=True, timestamp=timestamp, theme=theme, mutator=mutator)  # sets figure dimensions, does not display window
        (W,H) = plt.figure(figure).canvas.get_width_height()  # fast
        buf = io.BytesIO()
        plt.figure(1).canvas.print_raw(buf)  # fast
        img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))  # RGBA
        vipy.show.close(figure)
        t = vipy.image.Image(array=img, colorspace='rgba')
        if filename is not None:
            t.rgb().saveas(os.path.abspath(os.path.expanduser(filename)))
        return t

    def map(self, func):
        """Apply lambda function to our numpy array img, such that newimg=f(img), then replace newimg -> self.array().  The output of this lambda function must be a numpy array and if the channels or dtype changes, the colorspace is set to 'float'"""
        assert isinstance(func, types.LambdaType), "Input must be lambda function (e.g. f = lambda img: 255.0-img)"
        oldimg = self.array()  # reference
        newimg = func(self.array())  # in-place
        assert isnumpy(newimg), "Lambda function output must be numpy array"
        self.array(newimg)  # reference
        if newimg.dtype != oldimg.dtype or newimg.shape != oldimg.shape:
            self.colorspace('float')  # unknown colorspace after transformation, set generic
        return self

    def perceptualhash(self, bits=128, asbinary=False, asbytes=False):
        """Perceptual differential hash function

        This function converts to greyscale, resizes with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex(h)
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone()
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    @staticmethod
    def perceptualhash_distance(h1, h2):
        """Hamming distance between two perceptual hashes"""
        assert len(h1) == len(h2)
        return np.sum(np.unpackbits(bytearray().fromhex(h1)) != np.unpackbits(bytearray().fromhex(h2)))
    

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise"""
        self.array(np.rot90(self.numpy(), 3))
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise"""
        self.array(np.rot90(self.numpy(), 1))
        return self

    def face_detection(self, mindim=256,  conf=0.2):
        """Detect faces in the scene, add as objects, return new scene with just faces
        
        Args:
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled back to native resolution prior to return

        Returns
            A `vipy.image.Scene` object with all detected faces or the union of faces and all objects in self

        .. note:: This method uses a CPU-only pretrained face detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28') 
        return heyvi.detection.FaceDetector()(Scene.cast(self.clone()).clear().mindim(mindim)).flush() 
    
    def person_detection(self, mindim=256, conf=0.2):
        """Detect only people in the scene, add as objects, return new scene with just people

        Args:
            mindim [int]: The minimum dimension for downsampling the image for person detection.  Will be upsampled back to native resolution prior to return
            conf [float]: A real value between [0,1] of the minimum confidence for person detection

        Returns
            A `vipy.image.Scene` object with all detected people or the union of people and all objects in self
        
        .. note:: This method uses a CPU-only pretrained person detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28')
        return heyvi.detection.ObjectDetector()(Scene.cast(self.clone()).clear().mindim(mindim), conf=conf, objects=['person']).flush()

    def face_blur(self, radius=4, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.blurmask`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.blurmask`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with faceblur attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().blurmask()
        """
        im = self.face_detection(mindim=mindim)  # only faces
        return im.setattribute('face_blur', [o.int().json(encode=False) for o in im.objects()]).blurmask(radius=radius).downcast()

    def face_pixelize(self, radius=7, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.pixelize`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.radius`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with facepixelize attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().pixelize()
        """
        im = self.face_detection(mindim=mindim)          
        return im.setattribute('face_pixelize', [o.int().json(encode=False) for o in im.objects()]).pixelize(radius=radius).downcast()


    def viewport(self):
        """Return the bounding box of the current loaded pixels in the original filename/url/buffer.

        This reverses the chain of geometric transformations applied to the original image to recover the bounding box of the pixels in array().

        This is useful to specify a region of a larger image that was zoomed in for processing.
        
        To show this viewport as a bounding box:

        >>> im = vipy.image.vehicles().centercrop(100,100)
        >>> viewport = vipy.object.Detection.cast(im.viewport())
        >>> im.flush().append(viewport).show()
        """
        bb = self.imagebox()
        if self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                getattr(bb,f)(**kwargs)
        return bb

    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        return self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
    
    def recenter(self, p):
        """Recenter the image so that point p=(x=col, y=row) in the current image is in the middle of the new image, zeropad to (width, height).  
           This is useful to implement a 'saccade', under the small angle assumption, where a rotation is approximated by a translation
        """        
        return self.padcrop(self.imagebox().centroid(p))

Subclasses

Labeled

Static methods

def PIL_loader(x)

Load from a PIL image file object

Expand source code Browse git

@staticmethod    
def PIL_loader(x):
    """Load from a PIL image file object"""
    return np.array(x)

def bytes_array_loader(x)

Load from a bytes array

Expand source code Browse git

@staticmethod
def bytes_array_loader(x):
    """Load from a bytes array"""
    return np.array(PIL.Image.open(io.BytesIO(x)))

def cast(im)

Typecast the conformal vipy.image object im as Image.

This is useful for downcasting Scene or ImageDetection down to an image.

ims = vipy.image.RandomScene()
im = vipy.image.Image.cast(im)

def from_dict(d)

def from_json(s)

Import the JSON string s as an Image object.

Args

s: json encoded string

This will perform a round trip such that im1 == im2

im1 = vupy.image.RandomImage()
im2 = vipy.image.Image.from_json(im1.json())
assert im1 == im2

Note: to construct from non-encoded json (e.g. a dict prior to dumps), use from_dict

def from_torch(x, order='CHW')

Convert a 1xCxHxW, CxHxW or NxCxHxW torch tensor (or numpy array with torch channel order) to HxWxC numpy array, returns new Image with inferred colorspace corresponding to data type in x

Expand source code Browse git

@staticmethod
def from_torch(x, order='CHW'):
    """Convert a 1xCxHxW, CxHxW or NxCxHxW torch tensor (or numpy array with torch channel order) to HxWxC numpy array, returns new `vipy.image.Image` with inferred colorspace corresponding to data type in x"""
    from torch import Tensor, is_tensor;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow) 
    assert isinstance(x, Tensor) or isinstance(x, np.ndarray), "Invalid input type '%s'- must be torch.Tensor" % (str(type(x)))
    assert x.ndim == 4 or x.ndim == 3, "Torch tensor must be shape 1xCxHxW, CxHxW, or NxCxHxW"
    x = x.squeeze(0) if (x.ndim == 4 and x.shape[0] == 1) else x

    if order == 'CHW':
        x = x.permute(1,2,0).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,2,0)   # CxHxW -> HxWxC, copied            
    elif order == 'WHC':
        x = x.permute(1,0,2).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,0,2)   # WxHxC -> HxWxC, copied        
    elif order == 'HWC':
        x = x.cpu().detach().float().numpy() if is_tensor(x) else np.copy(x)  # HxWxC -> HxWxC, copied        
    elif order == 'NCHW':
        assert x.ndim == 4, "invalid shape"
        x = x.permute(2,3,1,0).cpu().detach().float().numpy()  # NxCxHxW -> HxWxCxN, copied        
    else:
        raise ValueError('unknown axis order "%s"' % order)

    img = x
    colorspace = 'float' if img.dtype == np.float32 else None
    colorspace = 'rgb' if img.dtype == np.uint8 and img.shape[2] == 3 else colorspace  # assumed
    colorspace = 'lum' if img.dtype == np.uint8 and img.shape[2] == 1 else colorspace
    return Image(array=img, colorspace=colorspace)

def from_uri(uri)

Create an image object from an absolute file path or url

def fromtorch(x, order='CHW')

Alias for Image.from_torch()

Expand source code Browse git

@staticmethod
def fromtorch(x, order='CHW'):
    """Alias for `vipy.image.Image.from_torch`"""
    return Image.from_torch(x, order)

def perceptualhash_distance(h1, h2)

Hamming distance between two perceptual hashes

Expand source code Browse git

@staticmethod
def perceptualhash_distance(h1, h2):
    """Hamming distance between two perceptual hashes"""
    assert len(h1) == len(h2)
    return np.sum(np.unpackbits(bytearray().fromhex(h1)) != np.unpackbits(bytearray().fromhex(h2)))

def untile(imlist)

Undo an image tiling and recreate the original image.

tiles = im.tile(im.width()/2, im.height()/2, 0, 0)
imdst = vipy.image.Image.untile(tiles)
assert imdst == im

Args

imlist: this must be the output of Image.tile()

Returns

A new Image object reconstructed from the tiling, such that this is equivalent to the input to vipy.image.Image.tile`

Note: All annotations are updated properly for each tile, when the source image is Scene

Instance variables

var attributes

Expand source code Browse git

class Image():
    """vipy.image.Image class
    
    The vipy image class provides a fluent, lazy interface for representing, transforming and visualizing images.
    The following constructors are supported:

    ```python
    im = vipy.image.Image(filename="/path/to/image.ext")
    ```
    
    All image file formats that are readable by PIL are supported here.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext")
    ```
    
    The image will be downloaded from the provided url and saved to a temporary filename.
    The environment variable VIPY_CACHE controls the location of the directory used for saving images, otherwise this will be saved to the system temp directory.

    ```python
    im = vipy.image.Image(url="http://domain.com/path/to/image.ext", filename="/path/to/new/image.ext")
    ```

    The image will be downloaded from the provided url and saved to the provided filename.
    The url() method provides optional basic authentication set for username and password

    ```python
    im = vipy.image.Image(array=img, colorspace='rgb')
    ```

    The image will be constructed from a provided numpy array 'img', with an associated colorspace.  The numpy array and colorspace can be one of the following combinations:

    - 'rgb': uint8, three channel (red, green, blue)
    - 'rgba':  uint8, four channel (rgb + alpha)
    - 'bgr': uint8, three channel (blue, green, red), such as is returned from cv2.imread()
    - 'bgra':  uint8, four channel
    - 'hsv':  uint8, three channel (hue, saturation, value)
    - 'lum;:  uint8, one channel, luminance (8 bit grey level)
    - 'grey':  float32, one channel in range [0,1] (32 bit intensity)
    - 'float':  float32, any channel in range [-inf, +inf]
    
    The most general colorspace is 'float' which is used to manipulate images prior to network encoding, such as applying bias. 
    
    Args:
        filename: a path to an image file that is readable by PIL
        url:  a url string to an image file that is readable by PIL
        array: a numpy array of type uint8 or float32 of shape HxWxC=height x width x channels
        colorspace:  a string in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']
        attributes:  a python dictionary that is passed by reference to the image.  This is useful for encoding metadata about the image.  Accessible as im.attributes

    Returns:
        A `vipy.image.Image` object

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')
    def __init__(self, filename=None, url=None, array=None, colorspace=None, attributes=None):
        # Private attributes
        self._loader = None     # function to load an image, set with loader() method
        self._array = None
        self._colorspace = None
        
        # Initialization
        self._filename = filename
        if url is not None:
            assert isinstance(url, str) and url.startswith(('http://', 'https://', 'scp://', 's3://'))  # faster than vipy.util.isurl()
        self._url = url
        if array is not None:
            assert isnumpy(array), 'Invalid Array - Type "%s" must be np.array()' % (str(type(array)))
        self.array(array)  # shallow copy

        # Colorspace guesses:
        if not colorspace:
            # Guess RGB colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'rgb' if (self.isloaded() and self._array.ndim==3 and self._array.shape[2] == 3 and self._array.dtype == np.uint8) else colorspace

            # Guess LUM colorspace if three channel uint8 if colorspace is not provided
            colorspace = 'lum' if (self.isloaded() and (self._array.ndim==2 or (self._array.ndim==3 and self._array.shape[2] == 1)) and self._array.dtype == np.uint8) else colorspace
            
            # Guess float colorspace if array is float32 and colorspace is not provided        
            colorspace = 'float' if (self.isloaded() and self._array.dtype == np.float32) else colorspace
            
        self.colorspace(colorspace)
        
        # Public attributes: passed in as a dictionary
        self.attributes = {} 
        if attributes is not None:
            assert isinstance(attributes, dict), "Attributes must be dictionary"
            self.attributes = attributes

    @classmethod
    def cast(cls, im):
        """Typecast the conformal vipy.image object im as `vipy.image.Image`.
        
        This is useful for downcasting `vipy.image.Scene` or `vipy.image.ImageDetection` down to an image.

        ```python
        ims = vipy.image.RandomScene()
        im = vipy.image.Image.cast(im)
        ```

        """
        assert isinstance(im, vipy.image.Image), "Invalid input - must derive from vipy.image.Image"
        return cls(filename=im._filename, url=im._url, array=im._array, colorspace=im._colorspace, attributes=im.attributes)


    @classmethod
    def from_dict(cls, d):
        d = {k.lstrip('_'):v for (k,v) in d.items()}  # prettyjson (remove "_" prefix to attributes)                
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   attributes=d['attributes'] if 'attributes' in d else None)
        
    @classmethod
    def from_uri(cls, uri):
        """Create an image object from an absolute file path or url"""
        assert vipy.util.isurl(uri) or vipy.util.isfile(uri), "invalid path"
        return cls(url=uri if vipy.util.isurl(uri) else None, filename=uri if vipy.util.isfile(uri) else None)            
    
    @classmethod
    def from_json(cls, s):
        """Import the JSON string s as an `vipy.image.Image` object.

        Args:
            s: json encoded string
        
        This will perform a round trip such that im1 == im2

        ```python
        im1 = vupy.image.RandomImage()
        im2 = vipy.image.Image.from_json(im1.json())
        assert im1 == im2
        ```

        Note: to construct from non-encoded json (e.g. a dict prior to dumps), use from_dict
        
        """
        return cls.from_dict(json.loads(s) if not isinstance(s, dict) else s)
    
    def __eq__(self, other):
        """Images are equivalent if they have the same filename, url and array"""
        return isinstance(other, Image) and other.filename()==self.filename() and other.url()==self.url() and np.all(other.array() == self.array())

    def __str__(self):
        return self.__repr__()

    def __iter__(self):
        """Yield single image for consistency with videos"""
        yield self

    def __len__(self):
        """Images have length 1 always"""
        return 1
    
    def __array__(self):
        """Called on np.array(self) for custom array container, (requires numpy >=1.16)"""
        return self.numpy()
    
    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self._array.shape[0], self._array.shape[1], self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.colorspace() == 'float':
            strlist.append('channels=%d' % self.channels())
        if self.filename() is not None:
            strlist.append('filename=%s' % self.filename())
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        return str('<vipy.image.Image: %s>' % (', '.join(strlist)))

    def sanitize(self):
        """Remove all private keys from the attributes dictionary.
        
        The attributes dictionary is useful storage for arbitrary (key,value) pairs.  However, this storage may contain sensitive information that should be scrubbed from the media before serialization.  As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key.  This is analogous to private or reserved attributes in the python lanugage.  Users should reserve these keynames for those keys that should be sanitized and removed before any serialization of this object.
        
        ```python
        assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
        ```

        """
        self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} if isinstance(self.attributes, dict) else self.attributes
        return self
    
    def print(self, prefix='', sleep=None):
        """Print the representation of the image and return self with an optional sleep=n seconds
        
        Useful for debugging or sequential visualization in long fluent chains.
        """
        print(prefix+self.__repr__())
        if sleep is not None:
            assert sleep > 0, "Sleep must be a non-negative number of seconds"
            time.sleep(sleep)
        return self

    def exif(self, extended=False):
        """Return the EXIF meta-data in filename as a dictionary.  Included non-base EXIF data if extended=True.  Returns empty dictionary if no EXIF exists.  Triggers download but not load."""

        d = {}
        if self.download().hasfilename():
            exif = PIL.Image.open(self.filename()).getexif()
            if exif is not None:
                d = {PIL.ExifTags.TAGS[k]:v for (k,v) in exif.items() if k in PIL.ExifTags.TAGS}

            if extended:
                for ifd_id in PIL.ExifTags.IFD:
                    try:
                        ifd = exif.get_ifd(ifd_id)                    
                        if ifd_id == PIL.ExifTags.IFD.GPSInfo:
                            resolve = PIL.ExifTags.GPSTAGS
                        else:
                            resolve = PIL.ExifTags.TAGS
                            
                            for k, v in ifd.items():
                                tag = resolve.get(k, k)
                                d[tag] = v
                    except KeyError:
                        pass
        return d
    
    def tile(self, tilewidth, tileheight, overlaprows=0, overlapcols=0):
        """Generate an image tiling.
        
        A tiling is a decomposition of an image into overlapping or non-overlapping rectangular regions.  

        Args:
            tilewidth: [int] the image width of each tile
            tileheight: [int] the image height of each tile
            overlaprows: [int] the number of overlapping rows (height) for each tile
            overlapcols: [int] the number of overlapping width (width) for each tile
    
        Returns:
            A list of `vipy.image.Image` objects such that each image is a single tile and the set of these tiles forms the original image
            Each image in the returned list contains the 'tile' attribute which encodes the crop used to create the tile.

        .. note:: 
            - `vipy.image.Image.tile` can be undone using `vipy.image.Image.untile`
            - The identity tiling is im.tile(im.width(), im.height(), overlaprows=0, overlapcols=0)
            - Ragged tiles outside the image boundary are zero padded
            - All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert tilewidth > 0 and tileheight > 0 and overlaprows >= 0 and overlapcols >= 0, "Invalid input"
        assert self.width() >= tilewidth-overlapcols and self.height() >= tileheight-overlaprows, "Invalid input" 
        bboxes = [BoundingBox(xmin=i, ymin=j, width=min(tilewidth, self.width()-i), height=min(tileheight, self.height()-j)) for i in range(0, self.width()-overlapcols, tilewidth-overlapcols) for j in range(0, self.height()-overlaprows, tileheight-overlaprows)]
        return [self.clone(shallow=True, attributes=True).setattribute('tile', {'crop':bb, 'shape':self.shape()}).crop(bb) for bb in bboxes]

    def union(self, other):
        """No-op for `vipy.image.Image`"""
        return self
    
    @classmethod
    def untile(cls, imlist):
        """Undo an image tiling and recreate the original image.

        ```python
        tiles = im.tile(im.width()/2, im.height()/2, 0, 0)
        imdst = vipy.image.Image.untile(tiles)
        assert imdst == im
        ```

        Args:
            imlist: this must be the output of `vipy.image.Image.tile`
        
        Returns:
            A  new `vipy.image.Image` object reconstructed from the tiling, such that this is equivalent to the input to vipy.image.Image.tile` 
        
        .. note:: All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
        """
        assert all([isinstance(im, vipy.image.Image) and im.hasattribute('tile') for im in imlist]), "invalid image tile list"        
        imc = None
        for im in imlist:
            if imc is None:
                imc = im.clone(shallow=True).array(np.zeros( (im.attributes['tile']['shape'][0], im.attributes['tile']['shape'][1], im.channels()), dtype=np.uint8))                
            imc = imc.splat(im.array(im.attributes['tile']['crop'].clone().to_origin().int().crop(im.array())), im.attributes['tile']['crop'])
            if hasattr(im, 'objectmap'):
                im.objectmap(lambda o: o.set_origin(im.attributes['tile']['crop']))  # FIXME: only for Scene()
            imc = imc.union(im)
        return imc
    
    def uncrop(self, bb, shape):
        """Uncrop using provided bounding box and zeropad to shape=(Height, Width).

        An uncrop is the inverse operation for a crop, which preserves the cropped portion of the image in the correct location and replaces the rest with zeros out to shape.
    
        ```python
        im = vipy.image.RandomImage(128, 128)
        bb = vipy.geometry.BoundingBox(xmin=0, ymin=0, width=64, height=64)
        uncrop = im.crop(bb).uncrop(bb, shape=(128,128))
        ```

        Args:
            bb: [`vipy.geometry.BoundingBox`] the bounding box used to crop the image in self
            shape: [tuple] (height, width) of the uncropped image
    
        Returns:
            this `vipy.image.Image` object with the pixels uncropped.

        .. note:: NOT idempotent.  This will generate different results if run more than once.
        """
        ((x,y,w,h), (H,W)) = (bb.xywh(), shape)
        ((dyb, dya), (dxb, dxa)) = ((int(y), int(H-(y+h))), (int(x), int(W-(x+w))))
        self._array = np.pad(self.load().array(),
                             ((dyb, dya), (dxb, dxa), (0, 0)) if
                             self.load().array().ndim == 3 else ((dyb, dya), (dxb, dxa)),
                             mode='constant')        
        return self

    def splat(self, im, bb):
        """Replace pixels within boundingbox in self with pixels in im"""
        assert isinstance(im, vipy.image.Image), "invalid image"
        assert (im.width() == bb.width() and im.height() == bb.height()) or bb.isinterior(im.width(), im.height()) and bb.isinterior(self.width(), self.height()), "Invalid bounding box '%s'" % str(bb)
        (x,y,w,h) = bb.xywh()
        self._array[int(y):int(y+h), int(x):int(x+w)] = im.array() if (im.width() == bb.width() and im.height() == bb.height()) else im.array()[int(y):int(y+h), int(x):int(x+w)]
        return self            
        
    def store(self):
        """Store the current image file as an attribute of this object.  Useful for archiving an object to be fully self contained without any external references.  
        
           -Remove this stored image using unstore()
           -Unpack this stored image and set up the filename using restore() 
           -This method is more efficient than load() followed by pkl(), as it stores the encoded image as a byte string.
           -Useful for creating a single self contained object for distributed processing.  

        ```python
        v == v.store().restore(v.filename()) 
        ```

        """
        assert self.hasfilename(), "Image file not found"
        with open(self.filename(), 'rb') as f:
            self.attributes['__image__'] = f.read()
        return self

    def unstore(self):
        """Delete the currently stored image from store()"""
        return self.delattribute('__image__')

    def restore(self, filename):
        """Save the currently stored image to filename, and set up filename"""
        assert self.hasattribute('__image__'), "Image not stored"
        with open(filename, 'wb') as f:
            f.write(self.attributes['__image__'])
        return self.filename(filename)                
    
    def abspath(self):
        """Change the path of the filename from a relative path to an absolute path (not relocatable)"""
        return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))

    def relpath(self, parent=None):
        """Replace the filename with a relative path to parent (or current working directory if none)"""
        parent = parent if parent is not None else os.getcwd()
        assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename())
        return self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent))

    def canload(self):
        """Return True if the image can be loaded successfully, useful for filtering bad links or corrupt images"""
        if not self.isloaded():
            try:
                if isimagefile(self._filename) and os.path.exists(self._filename):
                    PIL.Image.open(self._filename).verify()  # faster, throws exception on corrupted image
                else:
                    self.load().flush()  # fallback, load it and flush to avoid memory leak (expensive)
                return True
            except:
                return False
        else:
            return True
        
    def dict(self):
        """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding"""
        return {k.lstrip('_'):getattr(self, k) for k in Image.__slots__}  # prettyjson (remove "_" prefix to attributes)                                    

    def json(self, encode=True):
        if not vipy.util.is_jsonable(self.attributes):
            raise ValueError('attributes dictionary contains non-json elements and cannot be serialized.  Try self.clear_attributes() or self.sanitize()')        
        d = {k:v for (k,v) in self.dict().items() if v is not None}  # filter empty
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() first, then reload the image from backing filename/url after json import')
            d['array'] = self._array.tolist()
        return json.dumps(d) if encode else d
        
    def loader(self, f, x=None):
        """Lambda function to load an unsupported image filename to a numpy array.
        
        This lambda function will be executed during load and the result will be stored in self._array
        """
        self._loader = (f, x if x is not None else self.filename()) if f is not None else None
        return self

    @staticmethod
    def bytes_array_loader(x):
        """Load from a bytes array"""
        return np.array(PIL.Image.open(io.BytesIO(x)))
    
    @staticmethod    
    def PIL_loader(x):
        """Load from a PIL image file object"""
        return np.array(x)

    def has_loader(self):
        return self._loader is not None

    
    def load(self, verbose=False):
        """Load image to cached private '_array' attribute.

        Args:
            verbose: [bool] If true, show additional useful printed output

        Returns:
            This `vipy.image.Image` object with the pixels loaded in self._array as a numpy array.

        .. note:: This loader supports any image file format supported by PIL.  A custom loader can be added using `vipy.image.Image.loader`.
        """
        try:
            # Return if previously loaded image
            if self._array is not None:
                return self

            # Download URL to filename 
            if self._url is not None and not self.hasfilename():
                self.download(verbose=verbose)

            # Load filename to numpy array
            if self._loader is not None:
                (f,x) = self._loader
                self._array = f(x)
                if self.isluminance():
                    self.colorspace('lum')
                elif self.iscolor():
                    self.colorspace('rgb')
                else:
                    self._array = np.float32(self._array)
                    self.colorspace('float')

            elif isimagefile(self._filename):
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
                if self.istransparent():
                    self.colorspace('rgba')  # must be before iscolor()
                elif self.iscolor():
                    self.colorspace('rgb')
                elif self.isgrey():
                    self.colorspace('grey')
                elif self.isluminance():
                    self.colorspace('lum')
                else:
                    log.warning('unknown colorspace for image "%s" - attempting to coerce to colorspace=float' % str(self._filename))
                    self._array = np.float32(self._array)
                    self.colorspace('float')
            elif iswebp(self._filename):
                import vipy.video
                return vipy.video.Video(self._filename).load()
            elif self.hasfilename() and hasextension(self._filename):
                raise ValueError('Non-standard image extensions require a custom loader')
            elif self.hasfilename():
                # Attempting to open it anyway, may be an image file without an extension. Cross your fingers ...
                self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
            elif not self.hasfilename() and self.hasattribute('__shape'):
                # Loading a previously flushed buffer, load zeros so that we can display superclass objects
                self._array = np.zeros( self.getattribute('__shape') )
                self.delattribute('__shape')
            else:
                raise ValueError('image file not defined')
            
        except IOError:
            if verbose is True:
                log.error('IO error loading "%s" ' % self.filename())
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose is True:
                log.error('Load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def download(self, timeout=10, verbose=False, cached=False):
        """Download URL to filename provided by constructor, or to temp filename.

        Args:
            timeout: [int]  The timeout in seconds for an http or https connection attempt.  See also [urllib.request.urlopen](https://docs.python.org/3/library/urllib.request.html).
            verbose: [bool] If true, output more helpful message.
            cached: [bool] If true, use the cached previously downloaded file (if it exists)

        Returns:
            This `vipy.image.Image` object with the URL downloaded to `vipy.image.Image.filename` or to a `vipy.util.tempimage` filename which can be retrieved with `vipy.image.Image.filename`.
        """
        if self._url is None and self._filename is not None:
            return self
        if self._url is None or not isurl(str(self._url)):
            raise ValueError('[vipy.image.download][ERROR]: '
                             'Invalid URL "%s" ' % self._url)

        if self._filename is None:
            if vipy.globals.cache() is not None:
                # There is a potential race condition here when downloading files with common names like "main.jpg", add a (repeatable, hashed) 3 character subdir (<=4096 subdirs for ext3, max ~32K)
                self._filename = os.path.join(remkdir(vipy.globals.cache()), stringhash(self._url, 3), filetail(self._url.split('?')[0]))  # preserve image filename from url
                self._filename = self._filename+'.jpg' if not has_image_extension(self._filename) else self._filename  # guess JPG for URLs with no file extension (e.g. php)
            elif isimageurl(self._url):
                self._filename = tempimage(fileext(self._url))
            else:
                self._filename = tempjpg()  # guess JPG for URLs with no file extension

        if cached and self.hasfilename():
            return self
            
        try:
            url_scheme = urllib.parse.urlparse(self._url)[0]
            if url_scheme in ['http', 'https']:
                vipy.downloader.download(self._url,
                                         self._filename,
                                         verbose=verbose,
                                         progress=False,
                                         timeout=timeout,
                                         sha1=self.getattribute('url_sha1'),
                                         username=self.getattribute('url_username'),
                                         password=self.getattribute('url_password'))
            elif url_scheme == 'file':
                shutil.copyfile(self._url, self._filename)
            elif url_scheme == 's3':
                raise NotImplementedError('see vipy.downloader.s3()')                
            else:
                raise NotImplementedError(
                    'Invalid URL scheme "%s" for URL "%s"' %
                    (url_scheme, self._url))

        except (httplib.BadStatusLine,
                urllib.error.URLError,
                urllib.error.HTTPError):
            if verbose is True:
                log.error('download failed for url "%s"' % self._url)
            self._array = None
            raise

        except IOError:
            if verbose:
                log.error('IO error downloading "%s" -> "%s" ' % (self.url(), self.filename()))
            self._array = None
            raise

        except KeyboardInterrupt:
            raise

        except Exception:
            if verbose:
                log.error('load error for image "%s"' % self.filename())
            self._array = None
            raise

        return self

    def reload(self):
        """Flush the image buffer to force reloading from file or URL"""
        return self.clone(flush=True).load()

    def isloaded(self):
        """Return True if `vipy.image.Image.load` was successful in reading the image, or if the pixels are present in `vipy.image.Image.array`."""
        return self._array is not None

    def loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None

    def is_loaded(self):
        """Alias for `vipy.image.Image.isloaded`"""
        return self._array is not None
    
    def isdownloaded(self):
        """Does the filename returned from `vipy.image.Image.filename` exist, meaning that the url has been downloaded to a local file?"""
        return self._filename is not None and os.path.exists(self._filename)

    def is_downloaded(self):
        """Alias for ``vipy.image.Image.isdownloaded`"""
        return self.isdownloaded()
    
    def downloadif(self, timeout=10, verbose=False):
        """Download URL to filename if the filename has not already been downloaded"""
        return self.download(timeout=timeout, verbose=verbose, cached=True) if self.hasurl() else self

    def try_download(self, timeout=10, verbose=False):
        """Attempt to download URL to filename if the filename has not already been downloaded, return object on failure.  Check `vipy.image.Image.is_downloaded` on returned object for success"""
        try:
            return self.downloadif(timeout=timeout, verbose=verbose)
        except:
            return self

    def try_load(self):
        """Attempt to load an image, return the object on failure.  Check `vipy.image.Image.is_loaded` on returned object for success"""
        try:
            return self.load()
        except:
            return self
        
    def channels(self):
        """Return integer number of color channels"""
        return self.load().channels() if not self.isloaded() else (1 if self._array.ndim==2 else self._array.shape[2])

    def iscolor(self):
        """Color images are three channel or four channel with transparency, float32 or uint8"""
        return self.channels() == 3 or self.channels() == 4

    def istransparent(self):
        """Transparent images are four channel color images with transparency, float32 or uint8.  Return true if this image contains an alpha transparency channel"""
        return self.channels() == 4

    def blend(self, im, alpha):
        """alpha blend self and im in-place, such that self = alpha*self + (1-alpha)*im"""
        assert isinstance(im, Image)
        assert alpha >=0 and alpha <= 1
        assert self.colorspace() not in ['float','rgba','bgra'], "convert to rgb first"
        return self.load().map(lambda arr: np.uint8(alpha * arr + (1-alpha)*im.clone().load()._to_colorspace(self.colorspace()).resize_like(self).array()))
                
    def isgrey(self):
        """Grey images are one channel, float32"""
        return self.channels() == 1 and self.array().dtype == np.float32

    def isluminance(self):
        """Luninance images are one channel, uint8"""
        return self.channels() == 1 and self.array().dtype == np.uint8

    def filesize(self):
        """Return size of underlying image file, requires fetching metadata from filesystem"""
        assert self.hasfilename(), 'Invalid image filename'
        return os.path.getsize(self._filename)

    def width(self):
        """Return the width (columns) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().array().shape[1]

    def height(self):
        """Return the height (rows) of the image in integer pixels.
        
        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """        
        return self.load().array().shape[0]
    
    def shape(self):
        """Return the (height, width) or equivalently (rows, cols) of the image.
        
        Returns:
            A tuple (height=int, width=int) of the image.

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return (self.load().height(), self.width())

    def aspectratio(self):
        """Return the aspect ratio of the image as (width/height) ratio.

        Returns:
            A float equivalent to (`vipy.image.Image.width` / `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.load().width() / float(self.height())

    def area(self):
        """Return the area of the image as (width * height).

        Returns:
            An integer equivalent to (`vipy.image.Image.width` * `vipy.image.Image.height`)

        .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
        """
        return self.width()*self.height()
    
    def centroid(self):
        """Return the real valued center pixel coordinates of the image (col=x,row=y).
        
        The centroid is equivalent to half the `vipy.image.Image.shape`.

        Returns:
            A tuple (column, row) of the floating point center of the image.
        """
        return (self.load().width() / 2.0, self.height() / 2.0)

    def centerpixel(self):
        """Return the integer valued center pixel coordinates of the image (col=i,row=j)

        The centerpixel is equivalent to half the `vipy.image.Image.shape` floored to the nearest integer pixel coordinate.

        Returns:
            A tuple (int(column), int(row)) of the integer center of the image.
        """
        c = np.round(self.centroid())
        return (int(c[0]), int(c[1]))
    
    def array(self, np_array=None, copy=False):
        """Replace self._array with provided numpy array

        Args:
            np_array: [numpy array] A new array to use as the pixel buffer for this image.
            copy: [bool] If true, copy the buffer using np.copy(), else use a reference to this buffer.

        Returns:
            - If np_array is not None, return the `vipy.image.Image` object such that this object points to the provided numpy array as the pixel buffer
            - If np_array is None, then return the numpy array.

        .. notes:: 
            - If copy=False, then this `vipy.image.Image` object will share the pixel buffer with the owner of np_array.  Changes to pixels in this buffer will be shared.  
            - If copy=True, then this will significantly slow down processing for large images.  Use referneces wherevery possible.
        """
        if np_array is None:
            return self._array if copy is False else np.copy(self._array)
        elif isnumpyarray(np_array):
            self._array = np.copy(np_array) if copy else np_array  # reference or copy
            assert self._array.dtype == np.float32 or self._array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32 and not type='%s'" % (str(self._array.dtype))                        
            self.colorspace(None)  # must be set with colorspace() after array() but before _to_colorspace()
            return self
        else:
            raise ValueError('Invalid input - array() must be numpy array and not "%s"' % (str(type(np_array))))

    def fromarray(self, data):
        """Alias for `vipy.image.Image.array` with copy=True. This will set new numpy array as the pixel buffer with a numpy array copy"""
        return self.array(data, copy=True)
    
    def tonumpy(self):
        """Alias for `vipy.image.Image.numpy"""
        return self.numpy()

    def numpy(self):
        """Return a mutable numpy array for this `vipy.image.Image`.

        .. notes:: 
            - This will always return a writeable array with the 'WRITEABLE' numpy flag set.  This is useful for returning a mutable numpy array as needed while keeping the original non-mutable numpy array (e.g. loaded from a video or PIL) as the underlying pixel buffer for efficiency reasons.
            - Triggers a `vipy.image.Image.load` if the pixel buffer has not been loaded
            - This will trigger a copy if the ['WRITEABLE' flag](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html) is not set. 
        """        
        self.load()
        self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array  # triggers copy         
        return self._array

    def channel(self, k=None):
        """Return a cloned Image() object for the kth channel, or return an iterator over channels if k=None.

        Iterate over channels as single channel luminance images:

        ```python
        for c in self.channel():
            print(c)
        ```

        Return the kth channel as a single channel luminance image:

        ```python
        c = self.channel(k=0)
        ```

        """
        if k is None:
            return [self.channel(j) for j in range(0, self.channels())]
        elif k == 0 and self.channels() == 1:
            return self
        else:
            assert k < self.channels() and k>=0, "Requested channel=%d must be within valid channels=%d" % (k, self.channels())
            im = self.clone().load()
            im._array = im._array[:,:,k]
            im._colorspace = 'lum'
            return im

    def channelmean(self):
        """Return a cloned Image() object for the mean of all channels followed by returning a single channel float image.

        This is useful for visualizing multichannel images by reducing the channels to one

        ```python
        vipy.image.Image(array=np.random.rand(3,3,16).astype(np.float32)).channelmean().mat2gray().lum().show()
        ```
        
        """
        im = self.clone().load()
        im._array = np.mean(im._array, axis=2, keepdims=True)
        im._colorspace = 'float'
        return im
        
    def red(self):
        """Return red channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':
        
        ```python
        self.red() == self.channel(0) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.red() == self.channel(3) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(0)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(3)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def green(self):
        """Return green channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.green() == self.channel(1) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.green() == self.channel(1) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(1)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(1)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

    def blue(self):
        """Return blue channel as a cloned single channel `vipy.image.Image` object.

        These are equivalent operations if the colorspace is 'rgb' or 'rgba':

        ```python
        self.vlue() == self.channel(2) 
        ```

        These are equivalent operations if the colorspace is 'bgr' or 'bgra':

        ```python
        self.blue() == self.channel(0) 
        ```

        .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
        """
        assert self.channels() >= 3, "Must be three channel color image"
        if self.colorspace() in ['rgb', 'rgba']:
            return self.channel(2)
        elif self.colorspace() in ['bgr', 'bgra']:
            return self.channel(0)
        else:
            raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())                

    def alpha(self):
        """Return alpha (transparency) channel as a cloned single channel `vipy.image.Image` object"""
        assert self.channels() == 4 and self.colorspace() in ['rgba', 'bgra'], "Must be four channnel color image"
        return self.channel(3)
        
    def zeros(self):
        """Set the pixel buffer to all zeros of the same shape and datatype as this `vipy.image.Image` object.
        
        These are equivalent operations for the resulting buffer shape: 
        
        ```python
        import numpy as np
        np.zeros( (self.width(), self.height(), self.channels()) ) == self.zeros().array()
        ```

        Returns:
           This `vipy.image.Image` object.

        .. note:: Triggers load() if the pixel buffer has not been loaded yet.
        """
        self._array = 0*self.load()._array
        return self

    def pil(self):
        """Convert vipy.image.Image to PIL Image.
        
        Returns:
            A [PIL image](https://pillow.readthedocs.io/en/stable/reference/Image.html) object, that shares the pixel buffer by reference
        """
        if self.isloaded():
            assert self.channels() in [1,3,4] and (self.channels() == 1 or self.colorspace() != 'float'), "Incompatible with PIL"
            return PIL.Image.fromarray(self.numpy(), mode='RGB' if self.colorspace()=='rgb' else None)  # FIXME: mode='RGB' triggers slow tobytes() conversion, need RGBA or RGBX
        elif self.hasfilename():
            return PIL.Image.open(self.filename())
        else:
            return None
            
    def blur(self, sigma=3):
        """Apply a Gaussian blur with Gaussian kernel radius=sigma to the pixel buffer.
        
        Args:
            sigma: [float >=0] The gaussian blur kernel radius.

        Returns:
            This `vipy.image.Image` object with the pixel buffer blurred in place.
        """
        assert sigma >= 0
        return self.array(np.array(self.pil().filter(PIL.ImageFilter.GaussianBlur(radius=sigma)))) if sigma>0 else self
        
    def torch(self, order='CHW'):
        """Convert the batch of 1 HxWxC images to a CxHxW torch tensor.

        Args:
            order: ['CHW', 'HWC', 'NCHW', 'NHWC'].  The axis order of the torch tensor (channels, height, width) or (height, width, channels) or (1, channels, height, width) or (1, height, width, channels)

        Returns:
            A CxHxW or HxWxC or 1xCxHxW or 1xHxWxC [torch tensor](https://pytorch.org/docs/stable/tensors.html) that shares the pixel buffer of this image object by reference.

        .. note:: This supports numpy types and does not support bfloat16
        """
        from torch import from_numpy;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow)
        
        assert order in ['CHW', 'HWC', 'NCHW', 'NHWC']
        img = self.numpy() if self.array().ndim >= 3 else np.expand_dims(self.array(), 2)  # HxW -> HxWx1 
        
        if order in ['CHW']:
            assert img.ndim == 3, "invalid array"  
            img = img.transpose(2,0,1) # HxWxC -> CxHxW
        elif order in ['NCHW']:
            img = img.transpose(3,2,0,1) if img.ndim == 4 else np.expand_dims(img.transpose(2,0,1), 0)
        if order in ['NHWC']:
            img = img.transpose(3,0,1,2) if img.ndim == 4 else np.expand_dims(img, 0)
        return from_numpy(img)   # pip install torch

    
    @staticmethod
    def from_torch(x, order='CHW'):
        """Convert a 1xCxHxW, CxHxW or NxCxHxW torch tensor (or numpy array with torch channel order) to HxWxC numpy array, returns new `vipy.image.Image` with inferred colorspace corresponding to data type in x"""
        from torch import Tensor, is_tensor;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow) 
        assert isinstance(x, Tensor) or isinstance(x, np.ndarray), "Invalid input type '%s'- must be torch.Tensor" % (str(type(x)))
        assert x.ndim == 4 or x.ndim == 3, "Torch tensor must be shape 1xCxHxW, CxHxW, or NxCxHxW"
        x = x.squeeze(0) if (x.ndim == 4 and x.shape[0] == 1) else x

        if order == 'CHW':
            x = x.permute(1,2,0).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,2,0)   # CxHxW -> HxWxC, copied            
        elif order == 'WHC':
            x = x.permute(1,0,2).cpu().detach().float().numpy() if is_tensor(x) else np.copy(x).transpose(1,0,2)   # WxHxC -> HxWxC, copied        
        elif order == 'HWC':
            x = x.cpu().detach().float().numpy() if is_tensor(x) else np.copy(x)  # HxWxC -> HxWxC, copied        
        elif order == 'NCHW':
            assert x.ndim == 4, "invalid shape"
            x = x.permute(2,3,1,0).cpu().detach().float().numpy()  # NxCxHxW -> HxWxCxN, copied        
        else:
            raise ValueError('unknown axis order "%s"' % order)

        img = x
        colorspace = 'float' if img.dtype == np.float32 else None
        colorspace = 'rgb' if img.dtype == np.uint8 and img.shape[2] == 3 else colorspace  # assumed
        colorspace = 'lum' if img.dtype == np.uint8 and img.shape[2] == 1 else colorspace
        return Image(array=img, colorspace=colorspace)

    @staticmethod
    def fromtorch(x, order='CHW'):
        """Alias for `vipy.image.Image.from_torch`"""
        return Image.from_torch(x, order)
    
    def unload(self):
        """Remove cached file and loaded array.  Note that this will delete the underlying file returned by filename() if there is a backing url, cleaning up cached files and forcing re-download"""
        if self.hasurl() and self.hasfilename():
            log.info('Removing "%s"'% self._filename)
            os.remove(self._filename)
            self._filename = None
        if self.isloaded():
            self.flush()
        return self

    def uncache(self):
        """Alias for `vipy.image.Image.unload`"""
        return self.unload()
    
    def filename(self, newfile=None):
        """Return or set image filename"""
        if newfile is None:
            return self._filename
        else:
            self._filename = newfile
            return self

    def clear_filename(self):
        """Remove the current filename from the object in-place and return the object"""        
        self._filename = None
        return self
    
    def url(self, url=None, username=None, password=None, sha1=None):
        """Image URL and URL download properties"""
        if url is not None:
            self._url = url  # this does not change anything else (e.g. the associated filename), better to use constructor 
        if username is not None:
            self.setattribute('url_username', username)
        if password is not None:
            self.setattribute('url_password', password)
        if sha1 is not None:
            self.setattribute('url_sha1', sha1)
        if url is None and username is None and password is None and sha1 is None:
            return self._url
        else:
            return self
    
    def colorspace(self, colorspace=None):
        """Return or set the colorspace as ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']"""
        if colorspace is None:
            return self._colorspace
        else:
            assert str(colorspace).lower() in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s'. Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum']" % colorspace 
            img = self.array()
            if self.isloaded():
                colorspace = str(colorspace).lower()
                if self.array().dtype == np.float32:
                    assert colorspace in ['float', 'grey', 'gray'], "Invalid colorspace '%s' for float32 array()" % colorspace
                elif self.array().dtype == np.uint8:
                    assert colorspace in  ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum'], "Invalid colorspace '%s' for uint8 array(). Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum']" % colorspace
                else:
                    raise ValueError('unupported array() datatype "%s".  Allowable is [np.float32, np.uint8]' % colorspace)  # should never get here as long as array() is used to set _array
                if self.channels() == 1:
                    assert colorspace in ['float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s; for single channel array.  Allowable is ['float', 'grey', 'gray', 'lum']" % colorspace
                elif self.channels() == 3:
                    assert colorspace in ['float', 'rgb', 'bgr', 'hsv'], "Invalid colorspace '%s; for three channel array. Allowable is ['float', 'rgb', 'bgr', 'hsv']" % colorspace
                elif self.channels() == 4:
                    assert colorspace in ['float', 'rgba', 'bgra'], "Invalid colorspace '%s; for four channel array. Allowable is ['float', 'rgba', 'bgra']" % colorspace                    
                elif colorspace != 'float':
                    raise ValueError("Invalid colorspace '%s' for image channels=%d, type=%s" % (colorspace, self.channels(), str(self.array().dtype)))
                if colorspace in ['grey', 'gray']:
                    assert self.max() <= 1 and self.min() >= 0, "Colorspace 'grey' image must be np.float32 in range [0,1].  Use colorspace 'lum' for np.uint8 in range [0,255], or colorspace 'float' for unconstrained np.float32 [-inf, +inf]"
                    colorspace = 'grey'  # standardize
            self._colorspace = str(colorspace).lower()
            return self

    def uri(self):
        """Return the URI of the image object, either the URL or the filename, raise exception if neither defined"""
        if self.hasurl():
            return self.url()
        elif self.hasfilename():
            return self.filename()
        else:
            raise ValueError('No URI defined')

    def set_attribute(self, key, value):
        """Set element self.attributes[key]=value"""
        if self.attributes is None:
            self.attributes = {key: value}
        else:
            self.attributes[key] = value
        return self
    
    def setattribute(self, key, value):
        return self.set_attribute(key, value)
        
    def setattributes(self, newattr):
        """Set many attributes at once by providing a dictionary to be merged with current attributes"""
        assert isinstance(newattr, dict), "New attributes must be dictionary"
        self.attributes.update(newattr)
        return self
    
    def getattribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.get_attribute(k)

    def get_attribute(self, k):
        """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
        return self.attributes[k] if k in self.attributes else None        
    
    def clear_attributes(self):
        self.attributes = {}
        return self
    
    def hasattribute(self, key):
        return self.attributes is not None and key in self.attributes

    def delattribute(self, k):
        return self.del_attribute(k)
    
    def del_attribute(self, k):
        if k in self.attributes:
            self.attributes.pop(k)
        return self
        
    def delattributes(self, atts):
        for k in tolist(atts):
            self.delattribute(k)
        return self

    def append_attribute(self, key, value):
        """Append the value to attribute key, creating the key as an empty list if it does not exist"""
        if key not in self.attributes:
            self.attributes[key] = []
        self.attributes[key].append(value)
        return self
    
    def metadata(self, k=None):
        """Return metadata associated with this image, stored in the attributes dictionary"""
        return self.attributes if k is None else self.getattribute(k)
    
    def hasurl(self):
        """synonym for `vipy.image.has_url`"""
        return self.has_url()

    def has_url(self):
        """Return True if the image has a URL input source"""
        return self._url is not None
    
    def has_filename(self):
        """Return True if the image has a filename input source and this file exists"""
        return self._filename is not None and os.path.exists(self._filename)

    def hasfilename(self):
        """synonym for has_filename"""
        return self.has_filename()
    
    def clone(self, flushforward=False, flushbackward=False, flush=False, shallow=False, attributes=False, dereference=False):
        """Create deep copy of object, flushing the original buffer if requested and returning the cloned object.
        Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned 
        object which can be used for encoding and will be garbage collected.
        
            * flushforward: copy the object, and set the cloned object array() to None.  This flushes the video buffer for the clone, not the object
            * flushbackward:  copy the object, and set the object array() to None.  This flushes the video buffer for the object, not the clone.
            * flush:  set the object array() to None and clone the object.  This flushes the video buffer for both the clone and the object.
            * dereference: remove both the filename and URL (if present) in the cloned object, leaving only the buffer
        """
        if flush or (flushforward and flushbackward):
            self.flush()  # flushes buffer on object and clone
            im = copy.deepcopy(self)  # object and clone are flushed
        elif flushbackward:
            im = copy.deepcopy(self)  # propagates _array to clone
            self.flush()  # object flushed, clone not flushed
        elif flushforward:            
            array = self._array;
            self._array = None
            im = copy.deepcopy(self)   # does not propagate _array to clone
            self._array = array    # object not flushed
            im.flush()
        elif shallow:
            im = copy.copy(self)  # shallow copy
            im._array = np.asarray(self._array) if self._array is not None else None  # shared pixels            
        else:
            im = copy.deepcopy(self)
        if attributes:
            im.attributes = copy.deepcopy(self.attributes)
        if dereference:
            assert im._array is not None, "image buffer required"
            im._filename = None
            im._url = None
        return im

    def flush(self):
        """flush the image buffer in place, alias for self.clone(flush=True)"""        
        if not (self.hasfilename() or self.hasurl()):
            self.setattribute('__shape', (self.height(), self.width(), self.channels()))  # to load zeros
        self._array = None  # flushes buffer on object
        return self

        
    # Spatial transformations
    def resize(self, cols=None, rows=None, width=None, height=None, interp='bilinear', fast=False):
        """Resize the image buffer to (rows x cols) with bilinear interpolation.  If rows or cols is provided, rescale image maintaining aspect ratio"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width
        if cols is None or rows is None:
            if cols is None:
                scale = float(rows) / float(self.height())
            else:
                scale = float(cols) / float(self.width())
            self.rescale(scale)
        elif rows == self.height() and cols == self.width():
            return self  
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.array(im.pil().resize((cols, rows), string_to_pil_interpolation(interp))) for im in self.channel()])
        else:
            self._array = np.asarray(self.load().pil().resize((cols, rows), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))  
        return self

    def resize_like(self, im, interp='bilinear'):
        """Resize image buffer to be the same size as the provided vipy.image.Image()"""
        assert isinstance(im, Image), "Invalid input - Must be vipy.image.Image"
        return self.resize(im.width(), im.height(), interp=interp)
    
    def rescale(self, scale=1, interp='bilinear', fast=False):
        """Scale the image buffer by the given factor - NOT idempotent"""
        (height, width) = self.load().shape()
        if scale == 1:
            return self
        elif self.colorspace() == 'float':
            self._array = np.dstack([np.asarray(im.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp))) for im in self.channel()])
        else: 
            self._array = np.asarray(self.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that maximum dimension of image = dim, or return maxdim()"""
        return self.rescale(float(dim) / float(np.maximum(self.height(), self.width())), interp=interp) if dim is not None else max(self.shape())

    def mindim(self, dim=None, interp='bilinear'):
        """Resize image preserving aspect ratio so that minimum dimension of image = dim, or return mindim()"""
        if dim is None:
            return np.minimum(self.height(), self.width())
        else:
            s = float(dim) / float(np.minimum(self.height(), self.width()))
            return self.rescale(s, interp=interp) if dim is not None else min(self.shape())

    def mindimn(self, dim=None):
        """Frequently used shortcut for mindim(dim, interp='nearest')"""
        return self.mindim(dim, interp='nearest')
    
    def _pad(self, dx, dy, mode='edge'):
        """Pad image using np.pad mode, dx=padwidth, dy=padheight, thin wrapper for numpy.pad"""
        self._array = np.pad(self.load().array(),
                             ((dy, dy), (dx, dx), (0, 0)) if
                             self.load().array().ndim == 3 else ((dy, dy), (dx, dx)),
                             mode=mode)
        return self

    def pad(self, padwidth, padheight):
        """Alias for `vipy.image.Image.zeropad`"""
        return self.zeropad(padwidth, padheight)
    
    def zeropad(self, padwidth, padheight):
        """Pad image using np.pad constant by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        if self.channels() > 1 or self._array.ndim == 3:
            pad_shape = (padheight, padwidth, (0, 0))
        else:
            pad_shape = (padheight, padwidth)

        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        if padwidth[0]>0 or padwidth[1]>0 or padheight[0]>0 or padheight[1]>0:
            self._array = np.pad(self.load().array(), pad_width=pad_shape, mode='constant', constant_values=0)  # this is still slow due to the required copy, but fast-ish in np >= 1.17
            
        return self

    def zeropadlike(self, width, height):
        """Zero pad the image balancing the border so that the resulting image size is (width, height)"""
        assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height())
        return self.zeropad( (int(np.floor((width - self.width())/2)), int(np.ceil((width - self.width())/2))),
                             (int(np.floor((height - self.height())/2)), int(np.ceil((height - self.height())/2))))
                            
    def meanpad(self, padwidth, padheight, mu=None):
        """Pad image using np.pad constant=image mean by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""        
        if not isinstance(padwidth, tuple):
            padwidth = (padwidth, padwidth)
        if not isinstance(padheight, tuple):
            padheight = (padheight, padheight)
        assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
        mu = self.meanchannel() if mu is None else mu
        self._array = np.squeeze(np.dstack([np.pad(img,
                                                   pad_width=(padheight,padwidth),
                                                   mode='constant',
                                                   constant_values=c) for (img,c) in zip(self.channel(), mu)]))
        return self

    def alphapad(self, padwidth, padheight):
        """Pad image using alpha transparency by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
        assert self.colorspace() == 'rgba', "Colorspace must be RGBA for padding with transparency"
        return self.meanpad(padwidth, padheight, mu=np.array([0,0,0,0]))
    
    def minsquare(self):
        """Crop image of size (HxW) to (min(H,W), min(H,W)), keeping upper left corner constant"""
        S = np.min(self.load().shape())
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(S), height=int(S)))

    def maxsquare(self, S=None):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with zeropadding or (S,S) if provided, keeping upper left corner constant"""
        S = np.max(self.load().shape()) if S is None else int(S)
        (H, W) = self.shape()
        (dW, dH) = (max(0, S - W), max(0, S - H))
        if S != W or S != H:
            self._crop(BoundingBox(0, 0, width=min(W, S), height=min(H, S)))
            if (dW > 0 or dH > 0):
                self.zeropad((0,dW), (0,dH))  # crop then zeropad
        return self

    def maxmatte(self):
        """Crop image of size (HxW) to (max(H,W), max(H,W)) with balanced zeropadding forming a letterbox with top/bottom matte or pillarbox with left/right matte"""
        S = np.max(self.load().shape())
        dW = S - self.width()
        dH = S - self.height()
        return self.zeropad((int(np.floor(dW//2)), int(np.ceil(dW//2))), (int(np.floor(dH//2)), int(np.ceil(dH//2))))._crop(BoundingBox(0, 0, width=int(S), height=int(S)))
    
    def centersquare(self):
        """Crop image of size (NxN) in the center, such that N=min(width,height), keeping the image centroid constant"""
        N = int(np.min(self.shape()))
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=N, height=N))

    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner"""
        return self._crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def _crop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, clipping the box to the image rectangle"""
        assert isinstance(bbox, BoundingBox) and bbox.valid(), "Invalid input - Must be vipy.geometry.BoundingBox not '%s'" % (str(type(bbox)))
        if not bbox.isdegenerate() and bbox.hasoverlap(self.load().array()):
            bbox = bbox.imclip(self.load().array()).int()
            self._array = self.array()[bbox.ymin():bbox.ymax(),
                                       bbox.xmin():bbox.xmax()]
        else:
            log.warning('BoundingBox for crop() does not intersect image rectangle')
        return self

    def crop(self, bbox):
        return self._crop(bbox)
    
    def fliplr(self):
        """Mirror the image buffer about the vertical axis - Not idempotent"""
        self._array = np.fliplr(self.load().array())
        return self

    def flipud(self):
        """Mirror the image buffer about the horizontal axis - Not idempotent"""
        self._array = np.flipud(self.load().array())
        return self
    
    def imagebox(self):
        """Return the bounding box for the image rectangle"""
        return BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))

    def border_mask(self, pad):
        """Return a binary uint8 image the same size as self, with a border of pad pixels in width or height around the edge"""
        img = np.zeros( (self.height(), self.width()), dtype=np.uint8)
        img[0:pad,:] = 1
        img[-pad:,:] = 1
        img[:,0:pad] = 1
        img[:,-pad:] = 1
        return img
    
    # Color conversion
    def _to_colorspace(self, to):
        """Supported colorspaces are rgb, rgba, bgr, bgra, hsv, grey, lum, float"""
        to = to if to != 'gray' else 'grey'  # standardize 'gray' -> 'grey' internally
        self.load()
        if self.colorspace() == to:
            return self
        elif to == 'float':
            img = self.load().array()  # any type
            self._array = np.array(img).astype(np.float32)  # typecast to float32
        elif self.colorspace() == 'lum':
            img = self.load().array()  # single channel, uint8 [0,255]
            assert img.dtype == np.uint8
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel            
            self._array = np.array(PIL.Image.fromarray(img, mode='L').convert('RGB'))  # uint8 luminance [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() in ['gray', 'grey']:
            img = self.load().array()  # single channel float32 [0,1]
            img = np.squeeze(img, axis=2) if img.ndim == 3 and img.shape[2] == 1 else img  # remove singleton channel                        
            self._array = np.array(PIL.Image.fromarray(255.0 * img, mode='F').convert('RGB'))  # float32 gray [0,1] -> float32 gray [0,255] -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'rgba':
            img = self.load().array()  # uint8 RGBA
            if to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGBA -> uint8 ABGR
                self._array = self._array[:,:,[1,2,3,0]]  # uint8 ABGR -> uint8 BGRA
            elif to == 'rgb':
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
            else:
                self._array = self._array[:,:,0:-1]  # uint8 RGBA -> uint8 RGB
                self.colorspace('rgb')
                self._to_colorspace(to)
        elif self.colorspace() == 'rgb':
            img = self.load().array()  # uint8 RGB
            if to in ['grey', 'gray']:
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(img).convert('L')).astype(np.float32)  # uint8 RGB -> float32 Grey [0,255] -> float32 Grey [0,1]
            elif to == 'bgr':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
            elif to == 'hsv':
                self._array = np.array(PIL.Image.fromarray(img).convert('HSV'))  # uint8 RGB -> uint8 HSV
            elif to == 'lum':
                self._array = np.array(PIL.Image.fromarray(img).convert('L'))  # uint8 RGB -> uint8 Luminance (integer grey)
            elif to == 'rgba':
                self._array = np.dstack((img, 255*np.ones((img.shape[0], img.shape[1]), dtype=np.uint8)))
            elif to == 'bgra':
                self._array = np.array(img)[:,:,::-1]  # uint8 RGB -> uint8 BGR
                self._array = np.dstack((self._array, np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)))  # uint8 BGR -> uint8 BGRA
        elif self.colorspace() == 'bgr':
            img = self.load().array()  # uint8 BGR
            self._array = np.array(img)[:,:,::-1]  # uint8 BGR -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'bgra':
            img = self.load().array()  # uint8 BGRA
            self._array = np.array(img)[:,:,::-1]  # uint8 BGRA -> uint8 ARGB
            self._array = self._array[:,:,[1,2,3,0]]  # uint8 ARGB -> uint8 RGBA
            self.colorspace('rgba')
            self._to_colorspace(to)
        elif self.colorspace() == 'hsv':
            img = self.load().array()  # uint8 HSV
            self._array = np.array(PIL.Image.fromarray(img, mode='HSV').convert('RGB'))  # uint8 HSV -> uint8 RGB
            self.colorspace('rgb')
            self._to_colorspace(to)
        elif self.colorspace() == 'float':
            img = self.load().array()  # float32
            if np.max(img) > 1 or np.min(img) < 0:
                #log.warning('Converting float image to "%s" will be rescaled with self.mat2gray() into the range float32 [0,1]' % to)
                img = self.mat2gray().array()
            if not self.channels() in [1,2,3]:
                raise ValueError('Float image must be single channel or three channel RGB in the range float32 [0,1] prior to conversion')
            if self.channels() == 3:  # assumed RGB
                self._array = np.uint8(255 * self.array())   # float32 RGB [0,1] -> uint8 RGB [0,255]
                self.colorspace('rgb')
            else:
                img = np.squeeze(img, axis=2) if img.ndim == 3 else img
                self._array = (1.0 / 255.0) * np.array(PIL.Image.fromarray(np.uint8(255 * img)).convert('L')).astype(np.float32)  # float32 RGB [0,1] -> float32 gray [0,1]                
                self.colorspace('grey')
            self._to_colorspace(to)
        elif self.colorspace() is None:
            raise ValueError('Colorspace must be initialized by constructor or colorspace() to allow for colorspace conversion')
        else:
            raise ValueError('unsupported colorspace "%s"' % self.colorspace())

        self.colorspace(to)
        return self

    def affine_transform(self, A, border='zero'):
        """Apply a 3x3 affine geometric transformation to the image. 

        Args:        
            - A [np.ndarray]: 3x3 affine geometric transform from `vipy.geometry.affine_transform`
            - border [str]:  'zero' or 'replicate' to handle elements outside the image rectangle after transformation

        Returns:
            - This object with only the array transformed

        .. note:: The image will be loaded and converted to float() prior to applying the affine transformation.  
        .. note:: This will transform only the pixels, not objects
        """
        assert isnumpy(A) or isinstance(img, vipy.image.Image), "invalid input"
        assert A.shape == (3,3), "The affine transformation matrix should be the output of vipy.geometry.affine_transformation"
        self._array = vipy.geometry.imtransform(self.load().float().array(), A.astype(np.float32), border=border)
        return self

    def rotate(self, r):
        """Apply a rotation in radians to the pixels, with origin in upper left """
        return self.affine_transform(vipy.geometry.affine_transform(r=r))

    def rotate_by_exif(self):
        """Apply a rotation as specified in the 'Orientation' field EXIF metadata"""
        exif = self.exif()
        orientation = exif['Orientation'] if 'Orientation' in exif else None
        if orientation is None or orientation == 1:
            return self
        elif orientation == 2:
            return self.fliplr()
        elif orientation == 3:
            return self.flipud().fliplr()
        elif orientation == 4:
            return self.flipud()
        elif orientation == 5:
            return self.rot90cw().fliplr()
        elif orientation == 6:
            return self.rot90cw()
        elif orientation == 7:
            return self.rot90ccw().fliplr()
        elif orientation == 8:
            return self.rot90ccw()
        else:
            raise ValueError                        
    
    def rgb(self):
        """Convert the image buffer to three channel RGB uint8 colorspace"""
        return self._to_colorspace('rgb')

    def color_transform(self, colorspace):
        """Transform the image buffer from the current `vipy.image.Image.colorspace` to the provided colorspace"""
        return self._to_colorspace(colorspace)
    
    def colorspace_like(self, im):
        """Convert the image buffer to have the same colorspace as the provided image"""
        assert isinstance(im, vipy.image.Image)
        return self._to_colorspace(im.colorspace())
    
    def rgba(self):
        """Convert the image buffer to four channel RGBA uint8 colorspace"""
        return self._to_colorspace('rgba')

    def hsv(self):
        """Convert the image buffer to three channel HSV uint8 colorspace"""
        return self._to_colorspace('hsv')

    def bgr(self):
        """Convert the image buffer to three channel BGR uint8 colorspace"""
        return self._to_colorspace('bgr')

    def bgra(self):
        """Convert the image buffer to four channel BGR uint8 colorspace"""
        return self._to_colorspace('bgra')

    def float(self):
        """Convert the image buffer to float32"""
        return self._to_colorspace('float')

    def greyscale(self):
        """Convert the image buffer to single channel grayscale float32 in range [0,1]"""
        return self._to_colorspace('gray')

    def grayscale(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def grey(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def gray(self):
        """Alias for greyscale()"""
        return self.greyscale()

    def luminance(self):
        """Convert the image buffer to single channel uint8 in range [0,255] corresponding to the luminance component"""
        return self._to_colorspace('lum')

    def lum(self):
        """Alias for luminance()"""
        return self._to_colorspace('lum')

    def _apply_colormap(self, cm):
        """Convert an image to greyscale, then convert to RGB image with matplotlib colormap"""
        """https://matplotlib.org/tutorials/colors/colormaps.html"""
        cm = plt.get_cmap(cm)
        img = self.grey().numpy()
        self._array = np.uint8(255 * cm(img)[:,:,:3])
        self.colorspace('rgb')
        return self

    def jet(self):
        """Apply jet colormap to greyscale image and save as RGB"""
        return self._apply_colormap('jet')

    def rainbow(self):
        """Apply rainbow colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('gist_rainbow')

    def hot(self):
        """Apply hot colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('hot')

    def bone(self):
        """Apply bone colormap to greyscale image and convert to RGB"""
        return self._apply_colormap('bone')

    def saturate(self, min, max):
        """Saturate the image buffer to be clipped between [min,max], types of min/max are specified by _array type"""
        return self.array(np.minimum(np.maximum(self.load().array(), min), max))

    def intensity(self):
        """Convert image to float32 with [min,max] to range [0,1], force colormap to be 'float'.  Equivalent to self.mat2gray()"""
        self.array((self.load().float().array()) - float(self.min()) / float(self.max() - self.min()))
        return self.colorspace('float')

    def mat2gray(self, min=None, max=None):
        """Convert the image buffer so that [min,max] -> [0,1], forces conversion to 'float' colorspace.  This does not change the number of color channels"""
        self.array(mat2gray(np.float32(self.load().float().array()), min, max))
        return self.colorspace('float')
        return self

    def sum_to_one(self, eps=1E-6):
        """Return float image in the range [0,1] such that all elements sum to one"""
        return self.gain(1.0/(eps+self.mat2gray().sum()))
    
    def gain(self, g):
        """Elementwise multiply gain to image array, Gain should be broadcastable to array().  This forces the colospace to 'float'.  Don't use numba optimization, it is slower than native multiply"""
        #return self.array(vipy.math.gain(self.load()._array, np.float32(g))).colorspace('float') if g != 1 else self        
        #return self.array(np.float32(self.load()._array*g)).colorspace('float') if g != 1 else self  # numba not as fast anymore
        return self.array(np.multiply(self.load().float().array(), g)).colorspace('float') if g != 1 else self

    def bias(self, b):
        """Add a bias to the image array.  Bias should be broadcastable to array().  This forces the colorspace to 'float'"""
        self.array(self.load().float().array() + b)
        return self.colorspace('float')

    def normalize(self, gain, bias):
        """Apply a multiplicative gain g and additive bias b, such that self.array() == gain*self.array() + bias.

        This is useful for applying a normalization of an image prior to calling `vipy.image.Image.torch`.

        The following operations are equivalent.

        ```python
        im = vipy.image.RandomImage()
        im.normalize(1/255.0, 0.5) == im.gain(1/255.0).bias(-0.5)
        ```
        
        .. note:: This will force the colorspace to 'float'
        """
        return self.array(gain*self.load().float().array() + bias).colorspace('float')

    def additive_noise(self, hue=(-15,15), saturation=(-15,15), brightness=(-15,15)):
        """Apply uniform random additive noise in the given range to the given HSV color channels.  Image will be converted to HSV prior to applying noise."""
        assert isinstance(hue, tuple) and len(hue) == 2 and hue[1]>=hue[0]
        assert isinstance(saturation, tuple) and len(saturation) == 2 and saturation[1]>=saturation[0]
        assert isinstance(brightness, tuple) and len(brightness) == 2 and brightness[1]>=brightness[0]        
        
        (H,W,C) = (self.height(), self.width(), self.channels())
        noise = np.dstack(((hue[1]-hue[0])*np.random.rand(H,W)+hue[0],
                           (saturation[1]-saturation[0])*np.random.rand(H,W)+saturation[0],
                           (brightness[1]-brightness[0])*np.random.rand(H,W)+brightness[0]))
        return self.array( np.minimum(np.maximum(self.hsv().array() + noise, 0), 255).astype(np.uint8) )
            
    # Image statistics
    def stats(self):
        log.info(self)
        log.info('  Channels: %d' % self.channels())
        log.info('  Shape: %s' % str(self.shape()))
        log.info('  min: %s' % str(self.min()))
        log.info('  max: %s' % str(self.max()))
        log.info('  mean: %s' % str(self.mean()))
        log.info('  channel mean: %s' % str(self.meanchannel()))        
    
    def min(self):
        return self.minpixel()

    def minpixel(self):
        return np.min(self.load().array().flatten())
    
    def max(self):
        return self.maxpixel()

    def maxpixel(self):
        return np.max(self.load().array().flatten())
    
    def mean(self):
        """Mean over all pixels"""
        return np.mean(self.load().array().flatten())

    def meanchannel(self, k=None):
        """Mean per channel over all pixels.  If channel k is provided, return just the mean for that channel"""
        C = np.mean(self.load().array(), axis=(0, 1)).flatten()
        return C[k] if k is not None else C
    
    def sum(self):
        return np.sum(self.load().array().flatten())

    # Image visualization
    def closeall(self):
        """Close all open figure windows"""
        vipy.show.closeall()
        return self
    
    def close(self, fignum=None):
        """Close the requested figure number, or close all of fignum=None"""
        if fignum is None:
            return self.closeall()
        else:
            vipy.show.close(fignum)
            return self
    
    def show(self, figure=1, nowindow=False, timestamp=None, mutator=None, theme='dark'):
        """Display image on screen in provided figure number (clone and convert to RGB colorspace to show), return object"""
        assert self.load().isloaded(), 'Image not loaded'
        timestampfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark' else 'black'
        im = self.clone() if not mutator else mutator(self.clone())        
        vipy.show.imshow(im.rgb().numpy(), fignum=figure, nowindow=nowindow, timestamp=timestamp, timestampfacecolor=timestampfacecolor, flush=True, timestampcolor=timestampcolor)
        return self

    def save(self, filename=None, quality=75):
        """Save the current image to a new filename and return the image object.  Resets edit history"""
        return self.filename(self.saveas(filename if filename   is not None else tempjpg(), quality=quality)).loader(None).flush_array()
        
        
    # Image export
    def pkl(self, pklfile=None):
        """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains"""
        assert pklfile is not None or self.filename() is not None
        pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl')
        remkdir(vipy.util.filepath(pklfile))
        vipy.util.save(self, pklfile)
        return self

    def pklif(self, b, pklfile=None):
        """Save the object to the provided pickle file only if b=True. Useful for conditional intermediate saving in long fluent chains"""
        assert isinstance(b, bool)
        return self.pkl(pklfile) if b else self

    def saveas(self, filename=None, writeas=None, quality=75):
        """Save current buffer (not including drawing overlays) to new filename and return filename.  If filename is not provided, use a temporary JPEG filename."""
        filename = tempjpg() if filename is None else filename
        if self.colorspace() in ['gray']:
            imwritegray(self.grayscale()._array, filename, quality=quality)
        elif self.colorspace() != 'float':
            imwrite(self.load().array(), filename, writeas=writeas, quality=quality)
        else:
            raise ValueError('Convert float image to RGB or gray first. Try self.mat2gray()')
        return filename

    def saveastmp(self):
        """Save current buffer to temp JPEG filename and return filename.  Alias for savetmp()"""
        return self.saveas(tempjpg())

    def savetmp(self):
        """Save current buffer to temp JPEG filename and return filename.   Alias for saveastmp()"""
        return self.saveastmp()

    def tocache(self):
        """Save current buffer to temp JPEG filename in the VIPY cache and return filename."""
        return self.saveas(vipy.util.tocache(tempjpg()))
    
    def base64(self):
        """Export a base64 encoding of the image suitable for embedding in an html page"""
        buf = io.BytesIO()
        self.clone().rgb().pil().save(buf, format='JPEG')
        return base64.b64encode(buf.getvalue())
        
    def ascii(self):
        """Export a base64 ascii encoding of the image suitable for embedding in an <img> tag"""
        return self.base64().decode('ascii')

    def html(self, alt=None, id=None, attributes={'loading':'lazy'}):
        """Export a base64 encoding of the image suitable for embedding in an html page, enclosed in <img> tag
           
           Returns:
              -string:  <img src="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" loading="lazy"> containing base64 encoded JPEG and alt text with lazy loading
        """
        assert isinstance(attributes, dict)
        b = self.base64().decode('ascii')
        alt_text = alt if alt is not None else self.filename()
        id = id if id is not None else self.filename()
        attr = ' '.join(['%s="%s"' % (str(k),str(v)) for (k,v) in attributes.items()])
        return '<img %ssrc="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" %s>' % (('id="%s" ' % id) if id is not None else '', b, str(alt_text), attr)

    def annotate(self, timestamp=None, mutator=None, theme='dark'):
        """Change pixels of this image to include rendered annotation and return an image object"""
        # FIXME: for k in range(0,10): self.annotate().show(figure=k), this will result in cumulative figures
        return vipy.image.Image(array=self.savefig(timestamp=timestamp, theme=theme, mutator=mutator).rgb().array(), colorspace='rgb')

    def savefig(self, filename=None, figure=1, timestamp=None, theme='dark', mutator=None):
        """Save last figure output from self.show() with drawing overlays to provided filename and return filename"""
        self.show(figure=figure, nowindow=True, timestamp=timestamp, theme=theme, mutator=mutator)  # sets figure dimensions, does not display window
        (W,H) = plt.figure(figure).canvas.get_width_height()  # fast
        buf = io.BytesIO()
        plt.figure(1).canvas.print_raw(buf)  # fast
        img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))  # RGBA
        vipy.show.close(figure)
        t = vipy.image.Image(array=img, colorspace='rgba')
        if filename is not None:
            t.rgb().saveas(os.path.abspath(os.path.expanduser(filename)))
        return t

    def map(self, func):
        """Apply lambda function to our numpy array img, such that newimg=f(img), then replace newimg -> self.array().  The output of this lambda function must be a numpy array and if the channels or dtype changes, the colorspace is set to 'float'"""
        assert isinstance(func, types.LambdaType), "Input must be lambda function (e.g. f = lambda img: 255.0-img)"
        oldimg = self.array()  # reference
        newimg = func(self.array())  # in-place
        assert isnumpy(newimg), "Lambda function output must be numpy array"
        self.array(newimg)  # reference
        if newimg.dtype != oldimg.dtype or newimg.shape != oldimg.shape:
            self.colorspace('float')  # unknown colorspace after transformation, set generic
        return self

    def perceptualhash(self, bits=128, asbinary=False, asbytes=False):
        """Perceptual differential hash function

        This function converts to greyscale, resizes with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex(h)
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone()
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    @staticmethod
    def perceptualhash_distance(h1, h2):
        """Hamming distance between two perceptual hashes"""
        assert len(h1) == len(h2)
        return np.sum(np.unpackbits(bytearray().fromhex(h1)) != np.unpackbits(bytearray().fromhex(h2)))
    

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise"""
        self.array(np.rot90(self.numpy(), 3))
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise"""
        self.array(np.rot90(self.numpy(), 1))
        return self

    def face_detection(self, mindim=256,  conf=0.2):
        """Detect faces in the scene, add as objects, return new scene with just faces
        
        Args:
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled back to native resolution prior to return

        Returns
            A `vipy.image.Scene` object with all detected faces or the union of faces and all objects in self

        .. note:: This method uses a CPU-only pretrained face detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28') 
        return heyvi.detection.FaceDetector()(Scene.cast(self.clone()).clear().mindim(mindim)).flush() 
    
    def person_detection(self, mindim=256, conf=0.2):
        """Detect only people in the scene, add as objects, return new scene with just people

        Args:
            mindim [int]: The minimum dimension for downsampling the image for person detection.  Will be upsampled back to native resolution prior to return
            conf [float]: A real value between [0,1] of the minimum confidence for person detection

        Returns
            A `vipy.image.Scene` object with all detected people or the union of people and all objects in self
        
        .. note:: This method uses a CPU-only pretrained person detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
        """
        try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28')
        return heyvi.detection.ObjectDetector()(Scene.cast(self.clone()).clear().mindim(mindim), conf=conf, objects=['person']).flush()

    def face_blur(self, radius=4, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.blurmask`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.blurmask`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with faceblur attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().blurmask()
        """
        im = self.face_detection(mindim=mindim)  # only faces
        return im.setattribute('face_blur', [o.int().json(encode=False) for o in im.objects()]).blurmask(radius=radius).downcast()

    def face_pixelize(self, radius=7, mindim=256):
        """Replace pixels for all detected faces with `vipy.image.Scene.pixelize`, add locations of detected faces into attributes.

        Args:
            radius [int]: The radius of pixels for `vipy.image.Scene.radius`
            mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
        
        Returns:
            A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with facepixelize attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

        .. notes::
            - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
            - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
            - To retain boxes, use self.face_detection().pixelize()
        """
        im = self.face_detection(mindim=mindim)          
        return im.setattribute('face_pixelize', [o.int().json(encode=False) for o in im.objects()]).pixelize(radius=radius).downcast()


    def viewport(self):
        """Return the bounding box of the current loaded pixels in the original filename/url/buffer.

        This reverses the chain of geometric transformations applied to the original image to recover the bounding box of the pixels in array().

        This is useful to specify a region of a larger image that was zoomed in for processing.
        
        To show this viewport as a bounding box:

        >>> im = vipy.image.vehicles().centercrop(100,100)
        >>> viewport = vipy.object.Detection.cast(im.viewport())
        >>> im.flush().append(viewport).show()
        """
        bb = self.imagebox()
        if self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                getattr(bb,f)(**kwargs)
        return bb

    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        return self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
    
    def recenter(self, p):
        """Recenter the image so that point p=(x=col, y=row) in the current image is in the middle of the new image, zeropad to (width, height).  
           This is useful to implement a 'saccade', under the small angle assumption, where a rotation is approximated by a translation
        """        
        return self.padcrop(self.imagebox().centroid(p))

Methods

def abspath(self)

Change the path of the filename from a relative path to an absolute path (not relocatable)

Expand source code Browse git

def abspath(self):
    """Change the path of the filename from a relative path to an absolute path (not relocatable)"""
    return self.filename(os.path.normpath(os.path.abspath(os.path.expanduser(self.filename()))))

def additive_noise(self, hue=(-15, 15), saturation=(-15, 15), brightness=(-15, 15))

Apply uniform random additive noise in the given range to the given HSV color channels. Image will be converted to HSV prior to applying noise.

Expand source code Browse git

def additive_noise(self, hue=(-15,15), saturation=(-15,15), brightness=(-15,15)):
    """Apply uniform random additive noise in the given range to the given HSV color channels.  Image will be converted to HSV prior to applying noise."""
    assert isinstance(hue, tuple) and len(hue) == 2 and hue[1]>=hue[0]
    assert isinstance(saturation, tuple) and len(saturation) == 2 and saturation[1]>=saturation[0]
    assert isinstance(brightness, tuple) and len(brightness) == 2 and brightness[1]>=brightness[0]        
    
    (H,W,C) = (self.height(), self.width(), self.channels())
    noise = np.dstack(((hue[1]-hue[0])*np.random.rand(H,W)+hue[0],
                       (saturation[1]-saturation[0])*np.random.rand(H,W)+saturation[0],
                       (brightness[1]-brightness[0])*np.random.rand(H,W)+brightness[0]))
    return self.array( np.minimum(np.maximum(self.hsv().array() + noise, 0), 255).astype(np.uint8) )

def affine_transform(self, A, border='zero')

Apply a 3x3 affine geometric transformation to the image.

Args:
- A [np.ndarray]: 3x3 affine geometric transform from affine_transform() - border [str]: 'zero' or 'replicate' to handle elements outside the image rectangle after transformation

Returns

This object with only the array transformed

Note: The image will be loaded and converted to float() prior to applying the affine transformation.

Note: This will transform only the pixels, not objects

Expand source code Browse git

def affine_transform(self, A, border='zero'):
    """Apply a 3x3 affine geometric transformation to the image. 

    Args:        
        - A [np.ndarray]: 3x3 affine geometric transform from `vipy.geometry.affine_transform`
        - border [str]:  'zero' or 'replicate' to handle elements outside the image rectangle after transformation

    Returns:
        - This object with only the array transformed

    .. note:: The image will be loaded and converted to float() prior to applying the affine transformation.  
    .. note:: This will transform only the pixels, not objects
    """
    assert isnumpy(A) or isinstance(img, vipy.image.Image), "invalid input"
    assert A.shape == (3,3), "The affine transformation matrix should be the output of vipy.geometry.affine_transformation"
    self._array = vipy.geometry.imtransform(self.load().float().array(), A.astype(np.float32), border=border)
    return self

def alpha(self)

Return alpha (transparency) channel as a cloned single channel Image object

Expand source code Browse git

def alpha(self):
    """Return alpha (transparency) channel as a cloned single channel `vipy.image.Image` object"""
    assert self.channels() == 4 and self.colorspace() in ['rgba', 'bgra'], "Must be four channnel color image"
    return self.channel(3)

def alphapad(self, padwidth, padheight)

Pad image using alpha transparency by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding

Expand source code Browse git

def alphapad(self, padwidth, padheight):
    """Pad image using alpha transparency by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
    assert self.colorspace() == 'rgba', "Colorspace must be RGBA for padding with transparency"
    return self.meanpad(padwidth, padheight, mu=np.array([0,0,0,0]))

def annotate(self, timestamp=None, mutator=None, theme='dark')

Change pixels of this image to include rendered annotation and return an image object

Expand source code Browse git

def annotate(self, timestamp=None, mutator=None, theme='dark'):
    """Change pixels of this image to include rendered annotation and return an image object"""
    # FIXME: for k in range(0,10): self.annotate().show(figure=k), this will result in cumulative figures
    return vipy.image.Image(array=self.savefig(timestamp=timestamp, theme=theme, mutator=mutator).rgb().array(), colorspace='rgb')

def append_attribute(self, key, value)

Append the value to attribute key, creating the key as an empty list if it does not exist

Expand source code Browse git

def append_attribute(self, key, value):
    """Append the value to attribute key, creating the key as an empty list if it does not exist"""
    if key not in self.attributes:
        self.attributes[key] = []
    self.attributes[key].append(value)
    return self

def area(self)

Return the area of the image as (width * height).

Returns

An integer equivalent to (Image.width() * Image.height())

Note: This triggers a Image.load() if the image is not already loaded.

Expand source code Browse git

def area(self):
    """Return the area of the image as (width * height).

    Returns:
        An integer equivalent to (`vipy.image.Image.width` * `vipy.image.Image.height`)

    .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
    """
    return self.width()*self.height()

def array(self, np_array=None, copy=False)

Replace self._array with provided numpy array

Args

np_array: [numpy array] A new array to use as the pixel buffer for this image.
copy: [bool] If true, copy the buffer using np.copy(), else use a reference to this buffer.

Returns

If np_array is not None, return the Image object such that this object points to the provided numpy array as the pixel buffer
If np_array is None, then return the numpy array.

Notes

If copy=False, then this Image object will share the pixel buffer with the owner of np_array. Changes to pixels in this buffer will be shared.
If copy=True, then this will significantly slow down processing for large images. Use referneces wherevery possible.

Expand source code Browse git

def array(self, np_array=None, copy=False):
    """Replace self._array with provided numpy array

    Args:
        np_array: [numpy array] A new array to use as the pixel buffer for this image.
        copy: [bool] If true, copy the buffer using np.copy(), else use a reference to this buffer.

    Returns:
        - If np_array is not None, return the `vipy.image.Image` object such that this object points to the provided numpy array as the pixel buffer
        - If np_array is None, then return the numpy array.

    .. notes:: 
        - If copy=False, then this `vipy.image.Image` object will share the pixel buffer with the owner of np_array.  Changes to pixels in this buffer will be shared.  
        - If copy=True, then this will significantly slow down processing for large images.  Use referneces wherevery possible.
    """
    if np_array is None:
        return self._array if copy is False else np.copy(self._array)
    elif isnumpyarray(np_array):
        self._array = np.copy(np_array) if copy else np_array  # reference or copy
        assert self._array.dtype == np.float32 or self._array.dtype == np.uint8, "Invalid input - array() must be type uint8 or float32 and not type='%s'" % (str(self._array.dtype))                        
        self.colorspace(None)  # must be set with colorspace() after array() but before _to_colorspace()
        return self
    else:
        raise ValueError('Invalid input - array() must be numpy array and not "%s"' % (str(type(np_array))))

def ascii(self)

Export a base64 ascii encoding of the image suitable for embedding in an tag

Expand source code Browse git

def ascii(self):
    """Export a base64 ascii encoding of the image suitable for embedding in an <img> tag"""
    return self.base64().decode('ascii')

def aspectratio(self)

Return the aspect ratio of the image as (width/height) ratio.

Returns

A float equivalent to (Image.width() / Image.height())

Note: This triggers a Image.load() if the image is not already loaded.

Expand source code Browse git

def aspectratio(self):
    """Return the aspect ratio of the image as (width/height) ratio.

    Returns:
        A float equivalent to (`vipy.image.Image.width` / `vipy.image.Image.height`)

    .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
    """
    return self.load().width() / float(self.height())

def base64(self)

Export a base64 encoding of the image suitable for embedding in an html page

Expand source code Browse git

def base64(self):
    """Export a base64 encoding of the image suitable for embedding in an html page"""
    buf = io.BytesIO()
    self.clone().rgb().pil().save(buf, format='JPEG')
    return base64.b64encode(buf.getvalue())

def bgr(self)

Convert the image buffer to three channel BGR uint8 colorspace

Expand source code Browse git

def bgr(self):
    """Convert the image buffer to three channel BGR uint8 colorspace"""
    return self._to_colorspace('bgr')

def bgra(self)

Convert the image buffer to four channel BGR uint8 colorspace

Expand source code Browse git

def bgra(self):
    """Convert the image buffer to four channel BGR uint8 colorspace"""
    return self._to_colorspace('bgra')

def bias(self, b)

Add a bias to the image array. Bias should be broadcastable to array(). This forces the colorspace to 'float'

Expand source code Browse git

def bias(self, b):
    """Add a bias to the image array.  Bias should be broadcastable to array().  This forces the colorspace to 'float'"""
    self.array(self.load().float().array() + b)
    return self.colorspace('float')

def blend(self, im, alpha)

alpha blend self and im in-place, such that self = alphaself + (1-alpha)im

Expand source code Browse git

def blend(self, im, alpha):
    """alpha blend self and im in-place, such that self = alpha*self + (1-alpha)*im"""
    assert isinstance(im, Image)
    assert alpha >=0 and alpha <= 1
    assert self.colorspace() not in ['float','rgba','bgra'], "convert to rgb first"
    return self.load().map(lambda arr: np.uint8(alpha * arr + (1-alpha)*im.clone().load()._to_colorspace(self.colorspace()).resize_like(self).array()))

def blue(self)

Return blue channel as a cloned single channel Image object.

These are equivalent operations if the colorspace is 'rgb' or 'rgba':

self.vlue() == self.channel(2)

These are equivalent operations if the colorspace is 'bgr' or 'bgra':

self.blue() == self.channel(0)

Note: OpenCV returns images in BGR colorspace. Use this method to always return the desired channel by color.

Expand source code Browse git

def blue(self):
    """Return blue channel as a cloned single channel `vipy.image.Image` object.

    These are equivalent operations if the colorspace is 'rgb' or 'rgba':

    ```python
    self.vlue() == self.channel(2) 
    ```

    These are equivalent operations if the colorspace is 'bgr' or 'bgra':

    ```python
    self.blue() == self.channel(0) 
    ```

    .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
    """
    assert self.channels() >= 3, "Must be three channel color image"
    if self.colorspace() in ['rgb', 'rgba']:
        return self.channel(2)
    elif self.colorspace() in ['bgr', 'bgra']:
        return self.channel(0)
    else:
        raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

def blur(self, sigma=3)

Apply a Gaussian blur with Gaussian kernel radius=sigma to the pixel buffer.

Args

sigma: [float >=0] The gaussian blur kernel radius.

Returns

This Image object with the pixel buffer blurred in place.

Expand source code Browse git

def blur(self, sigma=3):
    """Apply a Gaussian blur with Gaussian kernel radius=sigma to the pixel buffer.
    
    Args:
        sigma: [float >=0] The gaussian blur kernel radius.

    Returns:
        This `vipy.image.Image` object with the pixel buffer blurred in place.
    """
    assert sigma >= 0
    return self.array(np.array(self.pil().filter(PIL.ImageFilter.GaussianBlur(radius=sigma)))) if sigma>0 else self

def bone(self)

Apply bone colormap to greyscale image and convert to RGB

Expand source code Browse git

def bone(self):
    """Apply bone colormap to greyscale image and convert to RGB"""
    return self._apply_colormap('bone')

def border_mask(self, pad)

Return a binary uint8 image the same size as self, with a border of pad pixels in width or height around the edge

Expand source code Browse git

def border_mask(self, pad):
    """Return a binary uint8 image the same size as self, with a border of pad pixels in width or height around the edge"""
    img = np.zeros( (self.height(), self.width()), dtype=np.uint8)
    img[0:pad,:] = 1
    img[-pad:,:] = 1
    img[:,0:pad] = 1
    img[:,-pad:] = 1
    return img

def canload(self)

Return True if the image can be loaded successfully, useful for filtering bad links or corrupt images

Expand source code Browse git

def canload(self):
    """Return True if the image can be loaded successfully, useful for filtering bad links or corrupt images"""
    if not self.isloaded():
        try:
            if isimagefile(self._filename) and os.path.exists(self._filename):
                PIL.Image.open(self._filename).verify()  # faster, throws exception on corrupted image
            else:
                self.load().flush()  # fallback, load it and flush to avoid memory leak (expensive)
            return True
        except:
            return False
    else:
        return True

def centercrop(self, height, width)

Crop image of size (height x width) in the center, keeping the image centroid constant

Expand source code Browse git

def centercrop(self, height, width):
    """Crop image of size (height x width) in the center, keeping the image centroid constant"""
    return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

def centerpixel(self)

Return the integer valued center pixel coordinates of the image (col=i,row=j)

The centerpixel is equivalent to half the Image.shape() floored to the nearest integer pixel coordinate.

Returns

A tuple (int(column), int(row)) of the integer center of the image.

Expand source code Browse git

def centerpixel(self):
    """Return the integer valued center pixel coordinates of the image (col=i,row=j)

    The centerpixel is equivalent to half the `vipy.image.Image.shape` floored to the nearest integer pixel coordinate.

    Returns:
        A tuple (int(column), int(row)) of the integer center of the image.
    """
    c = np.round(self.centroid())
    return (int(c[0]), int(c[1]))

def centersquare(self)

Crop image of size (NxN) in the center, such that N=min(width,height), keeping the image centroid constant

Expand source code Browse git

def centersquare(self):
    """Crop image of size (NxN) in the center, such that N=min(width,height), keeping the image centroid constant"""
    N = int(np.min(self.shape()))
    return self._crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=N, height=N))

def centroid(self)

Return the real valued center pixel coordinates of the image (col=x,row=y).

The centroid is equivalent to half the Image.shape().

Returns

A tuple (column, row) of the floating point center of the image.

Expand source code Browse git

def centroid(self):
    """Return the real valued center pixel coordinates of the image (col=x,row=y).
    
    The centroid is equivalent to half the `vipy.image.Image.shape`.

    Returns:
        A tuple (column, row) of the floating point center of the image.
    """
    return (self.load().width() / 2.0, self.height() / 2.0)

def channel(self, k=None)

Return a cloned Image() object for the kth channel, or return an iterator over channels if k=None.

Iterate over channels as single channel luminance images:

for c in self.channel():
    print(c)

Return the kth channel as a single channel luminance image:

c = self.channel(k=0)

Expand source code Browse git

def channel(self, k=None):
    """Return a cloned Image() object for the kth channel, or return an iterator over channels if k=None.

    Iterate over channels as single channel luminance images:

    ```python
    for c in self.channel():
        print(c)
    ```

    Return the kth channel as a single channel luminance image:

    ```python
    c = self.channel(k=0)
    ```

    """
    if k is None:
        return [self.channel(j) for j in range(0, self.channels())]
    elif k == 0 and self.channels() == 1:
        return self
    else:
        assert k < self.channels() and k>=0, "Requested channel=%d must be within valid channels=%d" % (k, self.channels())
        im = self.clone().load()
        im._array = im._array[:,:,k]
        im._colorspace = 'lum'
        return im

def channelmean(self)

Return a cloned Image() object for the mean of all channels followed by returning a single channel float image.

This is useful for visualizing multichannel images by reducing the channels to one

vipy.image.Image(array=np.random.rand(3,3,16).astype(np.float32)).channelmean().mat2gray().lum().show()

Expand source code Browse git

def channelmean(self):
    """Return a cloned Image() object for the mean of all channels followed by returning a single channel float image.

    This is useful for visualizing multichannel images by reducing the channels to one

    ```python
    vipy.image.Image(array=np.random.rand(3,3,16).astype(np.float32)).channelmean().mat2gray().lum().show()
    ```
    
    """
    im = self.clone().load()
    im._array = np.mean(im._array, axis=2, keepdims=True)
    im._colorspace = 'float'
    return im

def channels(self)

Return integer number of color channels

Expand source code Browse git

def channels(self):
    """Return integer number of color channels"""
    return self.load().channels() if not self.isloaded() else (1 if self._array.ndim==2 else self._array.shape[2])

def clear_attributes(self)

Expand source code Browse git

def clear_attributes(self):
    self.attributes = {}
    return self

def clear_filename(self)

Remove the current filename from the object in-place and return the object

Expand source code Browse git

def clear_filename(self):
    """Remove the current filename from the object in-place and return the object"""        
    self._filename = None
    return self

def clone(self, flushforward=False, flushbackward=False, flush=False, shallow=False, attributes=False, dereference=False)

Create deep copy of object, flushing the original buffer if requested and returning the cloned object. Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned object which can be used for encoding and will be garbage collected.

* flushforward: copy the object, and set the cloned object array() to None.  This flushes the video buffer for the clone, not the object
* flushbackward:  copy the object, and set the object array() to None.  This flushes the video buffer for the object, not the clone.
* flush:  set the object array() to None and clone the object.  This flushes the video buffer for both the clone and the object.
* dereference: remove both the filename and URL (if present) in the cloned object, leaving only the buffer

Expand source code Browse git

def clone(self, flushforward=False, flushbackward=False, flush=False, shallow=False, attributes=False, dereference=False):
    """Create deep copy of object, flushing the original buffer if requested and returning the cloned object.
    Flushing is useful for distributed memory management to free the buffer from this object, and pass along a cloned 
    object which can be used for encoding and will be garbage collected.
    
        * flushforward: copy the object, and set the cloned object array() to None.  This flushes the video buffer for the clone, not the object
        * flushbackward:  copy the object, and set the object array() to None.  This flushes the video buffer for the object, not the clone.
        * flush:  set the object array() to None and clone the object.  This flushes the video buffer for both the clone and the object.
        * dereference: remove both the filename and URL (if present) in the cloned object, leaving only the buffer
    """
    if flush or (flushforward and flushbackward):
        self.flush()  # flushes buffer on object and clone
        im = copy.deepcopy(self)  # object and clone are flushed
    elif flushbackward:
        im = copy.deepcopy(self)  # propagates _array to clone
        self.flush()  # object flushed, clone not flushed
    elif flushforward:            
        array = self._array;
        self._array = None
        im = copy.deepcopy(self)   # does not propagate _array to clone
        self._array = array    # object not flushed
        im.flush()
    elif shallow:
        im = copy.copy(self)  # shallow copy
        im._array = np.asarray(self._array) if self._array is not None else None  # shared pixels            
    else:
        im = copy.deepcopy(self)
    if attributes:
        im.attributes = copy.deepcopy(self.attributes)
    if dereference:
        assert im._array is not None, "image buffer required"
        im._filename = None
        im._url = None
    return im

def close(self, fignum=None)

Close the requested figure number, or close all of fignum=None

Expand source code Browse git

def close(self, fignum=None):
    """Close the requested figure number, or close all of fignum=None"""
    if fignum is None:
        return self.closeall()
    else:
        vipy.show.close(fignum)
        return self

def closeall(self)

Close all open figure windows

Expand source code Browse git

def closeall(self):
    """Close all open figure windows"""
    vipy.show.closeall()
    return self

def color_transform(self, colorspace)

Transform the image buffer from the current Image.colorspace() to the provided colorspace

Expand source code Browse git

def color_transform(self, colorspace):
    """Transform the image buffer from the current `vipy.image.Image.colorspace` to the provided colorspace"""
    return self._to_colorspace(colorspace)

def colorspace(self, colorspace=None)

Return or set the colorspace as ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']

Expand source code Browse git

def colorspace(self, colorspace=None):
    """Return or set the colorspace as ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'lum']"""
    if colorspace is None:
        return self._colorspace
    else:
        assert str(colorspace).lower() in ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s'. Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'float', 'grey', 'gray', 'lum']" % colorspace 
        img = self.array()
        if self.isloaded():
            colorspace = str(colorspace).lower()
            if self.array().dtype == np.float32:
                assert colorspace in ['float', 'grey', 'gray'], "Invalid colorspace '%s' for float32 array()" % colorspace
            elif self.array().dtype == np.uint8:
                assert colorspace in  ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum'], "Invalid colorspace '%s' for uint8 array(). Allowable is ['rgb', 'rgba', 'bgr', 'bgra', 'hsv', 'lum']" % colorspace
            else:
                raise ValueError('unupported array() datatype "%s".  Allowable is [np.float32, np.uint8]' % colorspace)  # should never get here as long as array() is used to set _array
            if self.channels() == 1:
                assert colorspace in ['float', 'grey', 'gray', 'lum'], "Invalid colorspace '%s; for single channel array.  Allowable is ['float', 'grey', 'gray', 'lum']" % colorspace
            elif self.channels() == 3:
                assert colorspace in ['float', 'rgb', 'bgr', 'hsv'], "Invalid colorspace '%s; for three channel array. Allowable is ['float', 'rgb', 'bgr', 'hsv']" % colorspace
            elif self.channels() == 4:
                assert colorspace in ['float', 'rgba', 'bgra'], "Invalid colorspace '%s; for four channel array. Allowable is ['float', 'rgba', 'bgra']" % colorspace                    
            elif colorspace != 'float':
                raise ValueError("Invalid colorspace '%s' for image channels=%d, type=%s" % (colorspace, self.channels(), str(self.array().dtype)))
            if colorspace in ['grey', 'gray']:
                assert self.max() <= 1 and self.min() >= 0, "Colorspace 'grey' image must be np.float32 in range [0,1].  Use colorspace 'lum' for np.uint8 in range [0,255], or colorspace 'float' for unconstrained np.float32 [-inf, +inf]"
                colorspace = 'grey'  # standardize
        self._colorspace = str(colorspace).lower()
        return self

def colorspace_like(self, im)

Convert the image buffer to have the same colorspace as the provided image

Expand source code Browse git

def colorspace_like(self, im):
    """Convert the image buffer to have the same colorspace as the provided image"""
    assert isinstance(im, vipy.image.Image)
    return self._to_colorspace(im.colorspace())

def cornercrop(self, height, width)

Crop image of size (height x width) from the upper left corner

Expand source code Browse git

def cornercrop(self, height, width):
    """Crop image of size (height x width) from the upper left corner"""
    return self._crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))

def crop(self, bbox)

Expand source code Browse git

def crop(self, bbox):
    return self._crop(bbox)

def del_attribute(self, k)

Expand source code Browse git

def del_attribute(self, k):
    if k in self.attributes:
        self.attributes.pop(k)
    return self

def delattribute(self, k)

Expand source code Browse git

def delattribute(self, k):
    return self.del_attribute(k)

def delattributes(self, atts)

Expand source code Browse git

def delattributes(self, atts):
    for k in tolist(atts):
        self.delattribute(k)
    return self

def dict(self)

Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding

Expand source code Browse git

def dict(self):
    """Return a python dictionary containing the relevant serialized attributes suitable for JSON encoding"""
    return {k.lstrip('_'):getattr(self, k) for k in Image.__slots__}  # prettyjson (remove "_" prefix to attributes)

def download(self, timeout=10, verbose=False, cached=False)

Download URL to filename provided by constructor, or to temp filename.

Args

timeout: [int] The timeout in seconds for an http or https connection attempt. See also urllib.request.urlopen.
verbose: [bool] If true, output more helpful message.
cached: [bool] If true, use the cached previously downloaded file (if it exists)

Returns

This Image object with the URL downloaded to Image.filename() or to a tempimage() filename which can be retrieved with Image.filename().

Expand source code Browse git

def download(self, timeout=10, verbose=False, cached=False):
    """Download URL to filename provided by constructor, or to temp filename.

    Args:
        timeout: [int]  The timeout in seconds for an http or https connection attempt.  See also [urllib.request.urlopen](https://docs.python.org/3/library/urllib.request.html).
        verbose: [bool] If true, output more helpful message.
        cached: [bool] If true, use the cached previously downloaded file (if it exists)

    Returns:
        This `vipy.image.Image` object with the URL downloaded to `vipy.image.Image.filename` or to a `vipy.util.tempimage` filename which can be retrieved with `vipy.image.Image.filename`.
    """
    if self._url is None and self._filename is not None:
        return self
    if self._url is None or not isurl(str(self._url)):
        raise ValueError('[vipy.image.download][ERROR]: '
                         'Invalid URL "%s" ' % self._url)

    if self._filename is None:
        if vipy.globals.cache() is not None:
            # There is a potential race condition here when downloading files with common names like "main.jpg", add a (repeatable, hashed) 3 character subdir (<=4096 subdirs for ext3, max ~32K)
            self._filename = os.path.join(remkdir(vipy.globals.cache()), stringhash(self._url, 3), filetail(self._url.split('?')[0]))  # preserve image filename from url
            self._filename = self._filename+'.jpg' if not has_image_extension(self._filename) else self._filename  # guess JPG for URLs with no file extension (e.g. php)
        elif isimageurl(self._url):
            self._filename = tempimage(fileext(self._url))
        else:
            self._filename = tempjpg()  # guess JPG for URLs with no file extension

    if cached and self.hasfilename():
        return self
        
    try:
        url_scheme = urllib.parse.urlparse(self._url)[0]
        if url_scheme in ['http', 'https']:
            vipy.downloader.download(self._url,
                                     self._filename,
                                     verbose=verbose,
                                     progress=False,
                                     timeout=timeout,
                                     sha1=self.getattribute('url_sha1'),
                                     username=self.getattribute('url_username'),
                                     password=self.getattribute('url_password'))
        elif url_scheme == 'file':
            shutil.copyfile(self._url, self._filename)
        elif url_scheme == 's3':
            raise NotImplementedError('see vipy.downloader.s3()')                
        else:
            raise NotImplementedError(
                'Invalid URL scheme "%s" for URL "%s"' %
                (url_scheme, self._url))

    except (httplib.BadStatusLine,
            urllib.error.URLError,
            urllib.error.HTTPError):
        if verbose is True:
            log.error('download failed for url "%s"' % self._url)
        self._array = None
        raise

    except IOError:
        if verbose:
            log.error('IO error downloading "%s" -> "%s" ' % (self.url(), self.filename()))
        self._array = None
        raise

    except KeyboardInterrupt:
        raise

    except Exception:
        if verbose:
            log.error('load error for image "%s"' % self.filename())
        self._array = None
        raise

    return self

def downloadif(self, timeout=10, verbose=False)

Download URL to filename if the filename has not already been downloaded

Expand source code Browse git

def downloadif(self, timeout=10, verbose=False):
    """Download URL to filename if the filename has not already been downloaded"""
    return self.download(timeout=timeout, verbose=verbose, cached=True) if self.hasurl() else self

def exif(self, extended=False)

Return the EXIF meta-data in filename as a dictionary. Included non-base EXIF data if extended=True. Returns empty dictionary if no EXIF exists. Triggers download but not load.

Expand source code Browse git

def exif(self, extended=False):
    """Return the EXIF meta-data in filename as a dictionary.  Included non-base EXIF data if extended=True.  Returns empty dictionary if no EXIF exists.  Triggers download but not load."""

    d = {}
    if self.download().hasfilename():
        exif = PIL.Image.open(self.filename()).getexif()
        if exif is not None:
            d = {PIL.ExifTags.TAGS[k]:v for (k,v) in exif.items() if k in PIL.ExifTags.TAGS}

        if extended:
            for ifd_id in PIL.ExifTags.IFD:
                try:
                    ifd = exif.get_ifd(ifd_id)                    
                    if ifd_id == PIL.ExifTags.IFD.GPSInfo:
                        resolve = PIL.ExifTags.GPSTAGS
                    else:
                        resolve = PIL.ExifTags.TAGS
                        
                        for k, v in ifd.items():
                            tag = resolve.get(k, k)
                            d[tag] = v
                except KeyError:
                    pass
    return d

def face_blur(self, radius=4, mindim=256)

Replace pixels for all detected faces with Scene.blurmask(), add locations of detected faces into attributes.

Args

radius [int]: The radius of pixels for Scene.blurmask() mindim [int]: The minimum dimension for downsampling the image for face detection. Will be upsampled prior to pixelize.

Returns

A Image object with a pixel buffer with all faces pixelized, with faceblur attribute set in Image.metadata() showing the locations of the blurred faces.

Notes

This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.
For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.
To retain boxes, use self.face_detection().blurmask()

Expand source code Browse git

def face_blur(self, radius=4, mindim=256):
    """Replace pixels for all detected faces with `vipy.image.Scene.blurmask`, add locations of detected faces into attributes.

    Args:
        radius [int]: The radius of pixels for `vipy.image.Scene.blurmask`
        mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
    
    Returns:
        A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with faceblur attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

    .. notes::
        - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
        - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
        - To retain boxes, use self.face_detection().blurmask()
    """
    im = self.face_detection(mindim=mindim)  # only faces
    return im.setattribute('face_blur', [o.int().json(encode=False) for o in im.objects()]).blurmask(radius=radius).downcast()

def face_detection(self, mindim=256, conf=0.2)

Detect faces in the scene, add as objects, return new scene with just faces

Args

mindim [int]: The minimum dimension for downsampling the image for face detection. Will be upsampled back to native resolution prior to return Returns A Scene object with all detected faces or the union of faces and all objects in self

Note: This method uses a CPU-only pretrained face detector. This is convenient, but slow. See the heyvi package for optimized GPU batch processing for faster operation.

Expand source code Browse git

def face_detection(self, mindim=256,  conf=0.2):
    """Detect faces in the scene, add as objects, return new scene with just faces
    
    Args:
        mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled back to native resolution prior to return

    Returns
        A `vipy.image.Scene` object with all detected faces or the union of faces and all objects in self

    .. note:: This method uses a CPU-only pretrained face detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
    """
    try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28') 
    return heyvi.detection.FaceDetector()(Scene.cast(self.clone()).clear().mindim(mindim)).flush()

def face_pixelize(self, radius=7, mindim=256)

Replace pixels for all detected faces with Scene.pixelize(), add locations of detected faces into attributes.

Args

radius [int]: The radius of pixels for vipy.image.Scene.radius mindim [int]: The minimum dimension for downsampling the image for face detection. Will be upsampled prior to pixelize.

Returns

A Image object with a pixel buffer with all faces pixelized, with facepixelize attribute set in Image.metadata() showing the locations of the blurred faces.

Notes

This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.
For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.
To retain boxes, use self.face_detection().pixelize()

Expand source code Browse git

def face_pixelize(self, radius=7, mindim=256):
    """Replace pixels for all detected faces with `vipy.image.Scene.pixelize`, add locations of detected faces into attributes.

    Args:
        radius [int]: The radius of pixels for `vipy.image.Scene.radius`
        mindim [int]: The minimum dimension for downsampling the image for face detection.  Will be upsampled prior to pixelize.
    
    Returns:
        A `vipy.image.Image` object with a pixel buffer with all faces pixelized, with facepixelize attribute set in `vipy.image.Image.metadata` showing the locations of the blurred faces.

    .. notes::
        - This method uses a CPU-only pretrained torch network for face detection from the heyvi visual analytics package, which is re-initialized on each call to this method.  
        - For batch operations on many images, it is preferred to set up the detection network once, then calling many images sequentially.  
        - To retain boxes, use self.face_detection().pixelize()
    """
    im = self.face_detection(mindim=mindim)          
    return im.setattribute('face_pixelize', [o.int().json(encode=False) for o in im.objects()]).pixelize(radius=radius).downcast()

def filename(self, newfile=None)

Return or set image filename

Expand source code Browse git

def filename(self, newfile=None):
    """Return or set image filename"""
    if newfile is None:
        return self._filename
    else:
        self._filename = newfile
        return self

def filesize(self)

Return size of underlying image file, requires fetching metadata from filesystem

Expand source code Browse git

def filesize(self):
    """Return size of underlying image file, requires fetching metadata from filesystem"""
    assert self.hasfilename(), 'Invalid image filename'
    return os.path.getsize(self._filename)

def fliplr(self)

Mirror the image buffer about the vertical axis - Not idempotent

Expand source code Browse git

def fliplr(self):
    """Mirror the image buffer about the vertical axis - Not idempotent"""
    self._array = np.fliplr(self.load().array())
    return self

def flipud(self)

Mirror the image buffer about the horizontal axis - Not idempotent

Expand source code Browse git

def flipud(self):
    """Mirror the image buffer about the horizontal axis - Not idempotent"""
    self._array = np.flipud(self.load().array())
    return self

def float(self)

Convert the image buffer to float32

Expand source code Browse git

def float(self):
    """Convert the image buffer to float32"""
    return self._to_colorspace('float')

def flush(self)

flush the image buffer in place, alias for self.clone(flush=True)

Expand source code Browse git

def flush(self):
    """flush the image buffer in place, alias for self.clone(flush=True)"""        
    if not (self.hasfilename() or self.hasurl()):
        self.setattribute('__shape', (self.height(), self.width(), self.channels()))  # to load zeros
    self._array = None  # flushes buffer on object
    return self

def fromarray(self, data)

Alias for Image.array() with copy=True. This will set new numpy array as the pixel buffer with a numpy array copy

Expand source code Browse git

def fromarray(self, data):
    """Alias for `vipy.image.Image.array` with copy=True. This will set new numpy array as the pixel buffer with a numpy array copy"""
    return self.array(data, copy=True)

def gain(self, g)

Elementwise multiply gain to image array, Gain should be broadcastable to array(). This forces the colospace to 'float'. Don't use numba optimization, it is slower than native multiply

Expand source code Browse git

def gain(self, g):
    """Elementwise multiply gain to image array, Gain should be broadcastable to array().  This forces the colospace to 'float'.  Don't use numba optimization, it is slower than native multiply"""
    #return self.array(vipy.math.gain(self.load()._array, np.float32(g))).colorspace('float') if g != 1 else self        
    #return self.array(np.float32(self.load()._array*g)).colorspace('float') if g != 1 else self  # numba not as fast anymore
    return self.array(np.multiply(self.load().float().array(), g)).colorspace('float') if g != 1 else self

def get_attribute(self, k)

Return the key k in the attributes dictionary (self.attributes) if present, else None

Expand source code Browse git

def get_attribute(self, k):
    """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
    return self.attributes[k] if k in self.attributes else None

def getattribute(self, k)

Return the key k in the attributes dictionary (self.attributes) if present, else None

Expand source code Browse git

def getattribute(self, k):
    """Return the key k in the attributes dictionary (self.attributes) if present, else None"""        
    return self.get_attribute(k)

def gray(self)

Alias for greyscale()

Expand source code Browse git

def gray(self):
    """Alias for greyscale()"""
    return self.greyscale()

def grayscale(self)

Alias for greyscale()

Expand source code Browse git

def grayscale(self):
    """Alias for greyscale()"""
    return self.greyscale()

def green(self)

Return green channel as a cloned single channel Image object.

These are equivalent operations if the colorspace is 'rgb' or 'rgba':

self.green() == self.channel(1)

These are equivalent operations if the colorspace is 'bgr' or 'bgra':

self.green() == self.channel(1)

Note: OpenCV returns images in BGR colorspace. Use this method to always return the desired channel by color.

Expand source code Browse git

def green(self):
    """Return green channel as a cloned single channel `vipy.image.Image` object.

    These are equivalent operations if the colorspace is 'rgb' or 'rgba':

    ```python
    self.green() == self.channel(1) 
    ```

    These are equivalent operations if the colorspace is 'bgr' or 'bgra':

    ```python
    self.green() == self.channel(1) 
    ```

    .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
    """
    assert self.channels() >= 3, "Must be three channel color image"
    if self.colorspace() in ['rgb', 'rgba']:
        return self.channel(1)
    elif self.colorspace() in ['bgr', 'bgra']:
        return self.channel(1)
    else:
        raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

def grey(self)

Alias for greyscale()

Expand source code Browse git

def grey(self):
    """Alias for greyscale()"""
    return self.greyscale()

def greyscale(self)

Convert the image buffer to single channel grayscale float32 in range [0,1]

Expand source code Browse git

def greyscale(self):
    """Convert the image buffer to single channel grayscale float32 in range [0,1]"""
    return self._to_colorspace('gray')

def has_filename(self)

Return True if the image has a filename input source and this file exists

Expand source code Browse git

def has_filename(self):
    """Return True if the image has a filename input source and this file exists"""
    return self._filename is not None and os.path.exists(self._filename)

def has_loader(self)

Expand source code Browse git

def has_loader(self):
    return self._loader is not None

def has_url(self)

Return True if the image has a URL input source

Expand source code Browse git

def has_url(self):
    """Return True if the image has a URL input source"""
    return self._url is not None

def hasattribute(self, key)

Expand source code Browse git

def hasattribute(self, key):
    return self.attributes is not None and key in self.attributes

def hasfilename(self)

synonym for has_filename

Expand source code Browse git

def hasfilename(self):
    """synonym for has_filename"""
    return self.has_filename()

def hasurl(self)

synonym for vipy.image.has_url

Expand source code Browse git

def hasurl(self):
    """synonym for `vipy.image.has_url`"""
    return self.has_url()

def height(self)

Return the height (rows) of the image in integer pixels.

Note: This triggers a Image.load() if the image is not already loaded.

Expand source code Browse git

def height(self):
    """Return the height (rows) of the image in integer pixels.
    
    .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
    """        
    return self.load().array().shape[0]

def hot(self)

Apply hot colormap to greyscale image and convert to RGB

Expand source code Browse git

def hot(self):
    """Apply hot colormap to greyscale image and convert to RGB"""
    return self._apply_colormap('hot')

def hsv(self)

Convert the image buffer to three channel HSV uint8 colorspace

Expand source code Browse git

def hsv(self):
    """Convert the image buffer to three channel HSV uint8 colorspace"""
    return self._to_colorspace('hsv')

def html(self, alt=None, id=None, attributes={'loading': 'lazy'})

Export a base64 encoding of the image suitable for embedding in an html page, enclosed in tag

Returns

-string: containing base64 encoded JPEG and alt text with lazy loading

Expand source code Browse git

def html(self, alt=None, id=None, attributes={'loading':'lazy'}):
    """Export a base64 encoding of the image suitable for embedding in an html page, enclosed in <img> tag
       
       Returns:
          -string:  <img src="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" loading="lazy"> containing base64 encoded JPEG and alt text with lazy loading
    """
    assert isinstance(attributes, dict)
    b = self.base64().decode('ascii')
    alt_text = alt if alt is not None else self.filename()
    id = id if id is not None else self.filename()
    attr = ' '.join(['%s="%s"' % (str(k),str(v)) for (k,v) in attributes.items()])
    return '<img %ssrc="data:image/jpeg;charset=utf-8;base64,%s" alt="%s" %s>' % (('id="%s" ' % id) if id is not None else '', b, str(alt_text), attr)

def imagebox(self)

Return the bounding box for the image rectangle

Expand source code Browse git

def imagebox(self):
    """Return the bounding box for the image rectangle"""
    return BoundingBox(xmin=0, ymin=0, width=int(self.width()), height=int(self.height()))

def intensity(self)

Convert image to float32 with [min,max] to range [0,1], force colormap to be 'float'. Equivalent to self.mat2gray()

Expand source code Browse git

def intensity(self):
    """Convert image to float32 with [min,max] to range [0,1], force colormap to be 'float'.  Equivalent to self.mat2gray()"""
    self.array((self.load().float().array()) - float(self.min()) / float(self.max() - self.min()))
    return self.colorspace('float')

def is_downloaded(self)

Alias for `Image.isdownloaded()

Expand source code Browse git

def is_downloaded(self):
    """Alias for ``vipy.image.Image.isdownloaded`"""
    return self.isdownloaded()

def is_loaded(self)

Alias for Image.isloaded()

Expand source code Browse git

def is_loaded(self):
    """Alias for `vipy.image.Image.isloaded`"""
    return self._array is not None

def iscolor(self)

Color images are three channel or four channel with transparency, float32 or uint8

Expand source code Browse git

def iscolor(self):
    """Color images are three channel or four channel with transparency, float32 or uint8"""
    return self.channels() == 3 or self.channels() == 4

def isdownloaded(self)

Does the filename returned from Image.filename() exist, meaning that the url has been downloaded to a local file?

Expand source code Browse git

def isdownloaded(self):
    """Does the filename returned from `vipy.image.Image.filename` exist, meaning that the url has been downloaded to a local file?"""
    return self._filename is not None and os.path.exists(self._filename)

def isgrey(self)

Grey images are one channel, float32

Expand source code Browse git

def isgrey(self):
    """Grey images are one channel, float32"""
    return self.channels() == 1 and self.array().dtype == np.float32

def isloaded(self)

Return True if Image.load() was successful in reading the image, or if the pixels are present in Image.array().

Expand source code Browse git

def isloaded(self):
    """Return True if `vipy.image.Image.load` was successful in reading the image, or if the pixels are present in `vipy.image.Image.array`."""
    return self._array is not None

def isluminance(self)

Luninance images are one channel, uint8

Expand source code Browse git

def isluminance(self):
    """Luninance images are one channel, uint8"""
    return self.channels() == 1 and self.array().dtype == np.uint8

def istransparent(self)

Transparent images are four channel color images with transparency, float32 or uint8. Return true if this image contains an alpha transparency channel

Expand source code Browse git

def istransparent(self):
    """Transparent images are four channel color images with transparency, float32 or uint8.  Return true if this image contains an alpha transparency channel"""
    return self.channels() == 4

def jet(self)

Apply jet colormap to greyscale image and save as RGB

Expand source code Browse git

def jet(self):
    """Apply jet colormap to greyscale image and save as RGB"""
    return self._apply_colormap('jet')

def json(self, encode=True)

Expand source code Browse git

def json(self, encode=True):
    if not vipy.util.is_jsonable(self.attributes):
        raise ValueError('attributes dictionary contains non-json elements and cannot be serialized.  Try self.clear_attributes() or self.sanitize()')        
    d = {k:v for (k,v) in self.dict().items() if v is not None}  # filter empty
    if 'array' in d and d['array'] is not None:
        if self.hasfilename() or self.hasurl():
            log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() first, then reload the image from backing filename/url after json import')
        d['array'] = self._array.tolist()
    return json.dumps(d) if encode else d

def load(self, verbose=False)

Load image to cached private '_array' attribute.

Args

verbose: [bool] If true, show additional useful printed output

Returns

This Image object with the pixels loaded in self._array as a numpy array.

Note: This loader supports any image file format supported by PIL. A custom loader can be added using Image.loader().

Expand source code Browse git

def load(self, verbose=False):
    """Load image to cached private '_array' attribute.

    Args:
        verbose: [bool] If true, show additional useful printed output

    Returns:
        This `vipy.image.Image` object with the pixels loaded in self._array as a numpy array.

    .. note:: This loader supports any image file format supported by PIL.  A custom loader can be added using `vipy.image.Image.loader`.
    """
    try:
        # Return if previously loaded image
        if self._array is not None:
            return self

        # Download URL to filename 
        if self._url is not None and not self.hasfilename():
            self.download(verbose=verbose)

        # Load filename to numpy array
        if self._loader is not None:
            (f,x) = self._loader
            self._array = f(x)
            if self.isluminance():
                self.colorspace('lum')
            elif self.iscolor():
                self.colorspace('rgb')
            else:
                self._array = np.float32(self._array)
                self.colorspace('float')

        elif isimagefile(self._filename):
            self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
            if self.istransparent():
                self.colorspace('rgba')  # must be before iscolor()
            elif self.iscolor():
                self.colorspace('rgb')
            elif self.isgrey():
                self.colorspace('grey')
            elif self.isluminance():
                self.colorspace('lum')
            else:
                log.warning('unknown colorspace for image "%s" - attempting to coerce to colorspace=float' % str(self._filename))
                self._array = np.float32(self._array)
                self.colorspace('float')
        elif iswebp(self._filename):
            import vipy.video
            return vipy.video.Video(self._filename).load()
        elif self.hasfilename() and hasextension(self._filename):
            raise ValueError('Non-standard image extensions require a custom loader')
        elif self.hasfilename():
            # Attempting to open it anyway, may be an image file without an extension. Cross your fingers ...
            self._array = np.array(PIL.Image.open(self._filename))  # RGB order!
        elif not self.hasfilename() and self.hasattribute('__shape'):
            # Loading a previously flushed buffer, load zeros so that we can display superclass objects
            self._array = np.zeros( self.getattribute('__shape') )
            self.delattribute('__shape')
        else:
            raise ValueError('image file not defined')
        
    except IOError:
        if verbose is True:
            log.error('IO error loading "%s" ' % self.filename())
        self._array = None
        raise

    except KeyboardInterrupt:
        raise

    except Exception:
        if verbose is True:
            log.error('Load error for image "%s"' % self.filename())
        self._array = None
        raise

    return self

def loaded(self)

Alias for Image.isloaded()

Expand source code Browse git

def loaded(self):
    """Alias for `vipy.image.Image.isloaded`"""
    return self._array is not None

def loader(self, f, x=None)

Lambda function to load an unsupported image filename to a numpy array.

This lambda function will be executed during load and the result will be stored in self._array

Expand source code Browse git

def loader(self, f, x=None):
    """Lambda function to load an unsupported image filename to a numpy array.
    
    This lambda function will be executed during load and the result will be stored in self._array
    """
    self._loader = (f, x if x is not None else self.filename()) if f is not None else None
    return self

def lum(self)

Alias for luminance()

Expand source code Browse git

def lum(self):
    """Alias for luminance()"""
    return self._to_colorspace('lum')

def luminance(self)

Convert the image buffer to single channel uint8 in range [0,255] corresponding to the luminance component

Expand source code Browse git

def luminance(self):
    """Convert the image buffer to single channel uint8 in range [0,255] corresponding to the luminance component"""
    return self._to_colorspace('lum')

def map(self, func)

Apply lambda function to our numpy array img, such that newimg=f(img), then replace newimg -> self.array(). The output of this lambda function must be a numpy array and if the channels or dtype changes, the colorspace is set to 'float'

Expand source code Browse git

def map(self, func):
    """Apply lambda function to our numpy array img, such that newimg=f(img), then replace newimg -> self.array().  The output of this lambda function must be a numpy array and if the channels or dtype changes, the colorspace is set to 'float'"""
    assert isinstance(func, types.LambdaType), "Input must be lambda function (e.g. f = lambda img: 255.0-img)"
    oldimg = self.array()  # reference
    newimg = func(self.array())  # in-place
    assert isnumpy(newimg), "Lambda function output must be numpy array"
    self.array(newimg)  # reference
    if newimg.dtype != oldimg.dtype or newimg.shape != oldimg.shape:
        self.colorspace('float')  # unknown colorspace after transformation, set generic
    return self

def mat2gray(self, min=None, max=None)

Convert the image buffer so that [min,max] -> [0,1], forces conversion to 'float' colorspace. This does not change the number of color channels

Expand source code Browse git

def mat2gray(self, min=None, max=None):
    """Convert the image buffer so that [min,max] -> [0,1], forces conversion to 'float' colorspace.  This does not change the number of color channels"""
    self.array(mat2gray(np.float32(self.load().float().array()), min, max))
    return self.colorspace('float')
    return self

def max(self)

Expand source code Browse git

def max(self):
    return self.maxpixel()

def maxdim(self, dim=None, interp='bilinear')

Resize image preserving aspect ratio so that maximum dimension of image = dim, or return maxdim()

Expand source code Browse git

def maxdim(self, dim=None, interp='bilinear'):
    """Resize image preserving aspect ratio so that maximum dimension of image = dim, or return maxdim()"""
    return self.rescale(float(dim) / float(np.maximum(self.height(), self.width())), interp=interp) if dim is not None else max(self.shape())

def maxmatte(self)

Crop image of size (HxW) to (max(H,W), max(H,W)) with balanced zeropadding forming a letterbox with top/bottom matte or pillarbox with left/right matte

Expand source code Browse git

def maxmatte(self):
    """Crop image of size (HxW) to (max(H,W), max(H,W)) with balanced zeropadding forming a letterbox with top/bottom matte or pillarbox with left/right matte"""
    S = np.max(self.load().shape())
    dW = S - self.width()
    dH = S - self.height()
    return self.zeropad((int(np.floor(dW//2)), int(np.ceil(dW//2))), (int(np.floor(dH//2)), int(np.ceil(dH//2))))._crop(BoundingBox(0, 0, width=int(S), height=int(S)))

def maxpixel(self)

Expand source code Browse git

def maxpixel(self):
    return np.max(self.load().array().flatten())

def maxsquare(self, S=None)

Crop image of size (HxW) to (max(H,W), max(H,W)) with zeropadding or (S,S) if provided, keeping upper left corner constant

Expand source code Browse git

def maxsquare(self, S=None):
    """Crop image of size (HxW) to (max(H,W), max(H,W)) with zeropadding or (S,S) if provided, keeping upper left corner constant"""
    S = np.max(self.load().shape()) if S is None else int(S)
    (H, W) = self.shape()
    (dW, dH) = (max(0, S - W), max(0, S - H))
    if S != W or S != H:
        self._crop(BoundingBox(0, 0, width=min(W, S), height=min(H, S)))
        if (dW > 0 or dH > 0):
            self.zeropad((0,dW), (0,dH))  # crop then zeropad
    return self

def mean(self)

Mean over all pixels

Expand source code Browse git

def mean(self):
    """Mean over all pixels"""
    return np.mean(self.load().array().flatten())

def meanchannel(self, k=None)

Mean per channel over all pixels. If channel k is provided, return just the mean for that channel

Expand source code Browse git

def meanchannel(self, k=None):
    """Mean per channel over all pixels.  If channel k is provided, return just the mean for that channel"""
    C = np.mean(self.load().array(), axis=(0, 1)).flatten()
    return C[k] if k is not None else C

def meanpad(self, padwidth, padheight, mu=None)

Pad image using np.pad constant=image mean by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding

Expand source code Browse git

def meanpad(self, padwidth, padheight, mu=None):
    """Pad image using np.pad constant=image mean by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""        
    if not isinstance(padwidth, tuple):
        padwidth = (padwidth, padwidth)
    if not isinstance(padheight, tuple):
        padheight = (padheight, padheight)
    assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
    mu = self.meanchannel() if mu is None else mu
    self._array = np.squeeze(np.dstack([np.pad(img,
                                               pad_width=(padheight,padwidth),
                                               mode='constant',
                                               constant_values=c) for (img,c) in zip(self.channel(), mu)]))
    return self

def metadata(self, k=None)

Return metadata associated with this image, stored in the attributes dictionary

Expand source code Browse git

def metadata(self, k=None):
    """Return metadata associated with this image, stored in the attributes dictionary"""
    return self.attributes if k is None else self.getattribute(k)

def min(self)

Expand source code Browse git

def min(self):
    return self.minpixel()

def mindim(self, dim=None, interp='bilinear')

Resize image preserving aspect ratio so that minimum dimension of image = dim, or return mindim()

Expand source code Browse git

def mindim(self, dim=None, interp='bilinear'):
    """Resize image preserving aspect ratio so that minimum dimension of image = dim, or return mindim()"""
    if dim is None:
        return np.minimum(self.height(), self.width())
    else:
        s = float(dim) / float(np.minimum(self.height(), self.width()))
        return self.rescale(s, interp=interp) if dim is not None else min(self.shape())

def mindimn(self, dim=None)

Frequently used shortcut for mindim(dim, interp='nearest')

Expand source code Browse git

def mindimn(self, dim=None):
    """Frequently used shortcut for mindim(dim, interp='nearest')"""
    return self.mindim(dim, interp='nearest')

def minpixel(self)

Expand source code Browse git

def minpixel(self):
    return np.min(self.load().array().flatten())

def minsquare(self)

Crop image of size (HxW) to (min(H,W), min(H,W)), keeping upper left corner constant

Expand source code Browse git

def minsquare(self):
    """Crop image of size (HxW) to (min(H,W), min(H,W)), keeping upper left corner constant"""
    S = np.min(self.load().shape())
    return self._crop(BoundingBox(xmin=0, ymin=0, width=int(S), height=int(S)))

def normalize(self, gain, bias)

Apply a multiplicative gain g and additive bias b, such that self.array() == gain*self.array() + bias.

This is useful for applying a normalization of an image prior to calling Image.torch().

The following operations are equivalent.

im = vipy.image.RandomImage()
im.normalize(1/255.0, 0.5) == im.gain(1/255.0).bias(-0.5)

Note: This will force the colorspace to 'float'

Expand source code Browse git

def normalize(self, gain, bias):
    """Apply a multiplicative gain g and additive bias b, such that self.array() == gain*self.array() + bias.

    This is useful for applying a normalization of an image prior to calling `vipy.image.Image.torch`.

    The following operations are equivalent.

    ```python
    im = vipy.image.RandomImage()
    im.normalize(1/255.0, 0.5) == im.gain(1/255.0).bias(-0.5)
    ```
    
    .. note:: This will force the colorspace to 'float'
    """
    return self.array(gain*self.load().float().array() + bias).colorspace('float')

def numpy(self)

Return a mutable numpy array for this Image.

Notes

This will always return a writeable array with the 'WRITEABLE' numpy flag set. This is useful for returning a mutable numpy array as needed while keeping the original non-mutable numpy array (e.g. loaded from a video or PIL) as the underlying pixel buffer for efficiency reasons.
Triggers a Image.load() if the pixel buffer has not been loaded
This will trigger a copy if the 'WRITEABLE' flag is not set.

Expand source code Browse git

def numpy(self):
    """Return a mutable numpy array for this `vipy.image.Image`.

    .. notes:: 
        - This will always return a writeable array with the 'WRITEABLE' numpy flag set.  This is useful for returning a mutable numpy array as needed while keeping the original non-mutable numpy array (e.g. loaded from a video or PIL) as the underlying pixel buffer for efficiency reasons.
        - Triggers a `vipy.image.Image.load` if the pixel buffer has not been loaded
        - This will trigger a copy if the ['WRITEABLE' flag](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html) is not set. 
    """        
    self.load()
    self._array = np.copy(self._array) if not self._array.flags['WRITEABLE'] else self._array  # triggers copy         
    return self._array

def pad(self, padwidth, padheight)

Alias for Image.zeropad()

Expand source code Browse git

def pad(self, padwidth, padheight):
    """Alias for `vipy.image.Image.zeropad`"""
    return self.zeropad(padwidth, padheight)

def padcrop(self, bbox)

Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects

Expand source code Browse git

def padcrop(self, bbox):
    """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
    dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
    dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
    return self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))

def perceptualhash(self, bits=128, asbinary=False, asbytes=False)

Perceptual differential hash function

This function converts to greyscale, resizes with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.

Args

bits: [int] longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
asbinary: [bool] If true, return a binary array
asbytes: [bool] if true return a byte array

Returns

A hash string encoding the perceptual hash such that Image.perceptualhash_distance() can be used to compute a hash distance
asbytes: a bytes array
asbinary: a numpy binary array

Notes

Can be used for near duplicate detection by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing. Equivalently, Image.perceptualhash_distance().
The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex(h)

Expand source code Browse git

def perceptualhash(self, bits=128, asbinary=False, asbytes=False):
    """Perceptual differential hash function

    This function converts to greyscale, resizes with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
    
    Args:
        bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
        asbinary: [bool] If true, return a binary array
        asbytes: [bool] if true return a byte array

    Returns:
        A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
        asbytes: a bytes array
        asbinary: a numpy binary array            

    .. notes::
        - Can be used for near duplicate detection by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
        - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex(h)
    """        
    allowablebits = [2*k*k for k in range(2, 17)]
    assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
    sq = int(np.ceil(np.sqrt(bits/2.0)))
    im = self.clone()
    b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
    return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

def person_detection(self, mindim=256, conf=0.2)

Detect only people in the scene, add as objects, return new scene with just people

Args

mindim [int]: The minimum dimension for downsampling the image for person detection. Will be upsampled back to native resolution prior to return conf [float]: A real value between [0,1] of the minimum confidence for person detection Returns A Scene object with all detected people or the union of people and all objects in self

Note: This method uses a CPU-only pretrained person detector. This is convenient, but slow. See the heyvi package for optimized GPU batch processing for faster operation.

Expand source code Browse git

def person_detection(self, mindim=256, conf=0.2):
    """Detect only people in the scene, add as objects, return new scene with just people

    Args:
        mindim [int]: The minimum dimension for downsampling the image for person detection.  Will be upsampled back to native resolution prior to return
        conf [float]: A real value between [0,1] of the minimum confidence for person detection

    Returns
        A `vipy.image.Scene` object with all detected people or the union of people and all objects in self
    
    .. note:: This method uses a CPU-only pretrained person detector.  This is convenient, but slow.  See the heyvi package for optimized GPU batch processing for faster operation.
    """
    try_import('heyvi'); import heyvi; assert heyvi.version.is_at_least('0.3.28')
    return heyvi.detection.ObjectDetector()(Scene.cast(self.clone()).clear().mindim(mindim), conf=conf, objects=['person']).flush()

def pil(self)

Convert vipy.image.Image to PIL Image.

Returns

A PIL image object, that shares the pixel buffer by reference

Expand source code Browse git

def pil(self):
    """Convert vipy.image.Image to PIL Image.
    
    Returns:
        A [PIL image](https://pillow.readthedocs.io/en/stable/reference/Image.html) object, that shares the pixel buffer by reference
    """
    if self.isloaded():
        assert self.channels() in [1,3,4] and (self.channels() == 1 or self.colorspace() != 'float'), "Incompatible with PIL"
        return PIL.Image.fromarray(self.numpy(), mode='RGB' if self.colorspace()=='rgb' else None)  # FIXME: mode='RGB' triggers slow tobytes() conversion, need RGBA or RGBX
    elif self.hasfilename():
        return PIL.Image.open(self.filename())
    else:
        return None

def pkl(self, pklfile=None)

save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains

Expand source code Browse git

def pkl(self, pklfile=None):
    """save the object to a pickle file and return the object, useful for intermediate saving in long fluent chains"""
    assert pklfile is not None or self.filename() is not None
    pklfile = pklfile if pklfile is not None else toextension(self.filename(), '.pkl')
    remkdir(vipy.util.filepath(pklfile))
    vipy.util.save(self, pklfile)
    return self

def pklif(self, b, pklfile=None)

Save the object to the provided pickle file only if b=True. Useful for conditional intermediate saving in long fluent chains

Expand source code Browse git

def pklif(self, b, pklfile=None):
    """Save the object to the provided pickle file only if b=True. Useful for conditional intermediate saving in long fluent chains"""
    assert isinstance(b, bool)
    return self.pkl(pklfile) if b else self

def print(self, prefix='', sleep=None)

Print the representation of the image and return self with an optional sleep=n seconds

Useful for debugging or sequential visualization in long fluent chains.

Expand source code Browse git

def print(self, prefix='', sleep=None):
    """Print the representation of the image and return self with an optional sleep=n seconds
    
    Useful for debugging or sequential visualization in long fluent chains.
    """
    print(prefix+self.__repr__())
    if sleep is not None:
        assert sleep > 0, "Sleep must be a non-negative number of seconds"
        time.sleep(sleep)
    return self

def rainbow(self)

Apply rainbow colormap to greyscale image and convert to RGB

Expand source code Browse git

def rainbow(self):
    """Apply rainbow colormap to greyscale image and convert to RGB"""
    return self._apply_colormap('gist_rainbow')

def recenter(self, p)

Recenter the image so that point p=(x=col, y=row) in the current image is in the middle of the new image, zeropad to (width, height).
This is useful to implement a 'saccade', under the small angle assumption, where a rotation is approximated by a translation

Expand source code Browse git

def recenter(self, p):
    """Recenter the image so that point p=(x=col, y=row) in the current image is in the middle of the new image, zeropad to (width, height).  
       This is useful to implement a 'saccade', under the small angle assumption, where a rotation is approximated by a translation
    """        
    return self.padcrop(self.imagebox().centroid(p))

def red(self)

Return red channel as a cloned single channel Image object.

These are equivalent operations if the colorspace is 'rgb' or 'rgba':

self.red() == self.channel(0)

These are equivalent operations if the colorspace is 'bgr' or 'bgra':

self.red() == self.channel(3)

Note: OpenCV returns images in BGR colorspace. Use this method to always return the desired channel by color.

Expand source code Browse git

def red(self):
    """Return red channel as a cloned single channel `vipy.image.Image` object.

    These are equivalent operations if the colorspace is 'rgb' or 'rgba':
    
    ```python
    self.red() == self.channel(0) 
    ```

    These are equivalent operations if the colorspace is 'bgr' or 'bgra':

    ```python
    self.red() == self.channel(3) 
    ```

    .. note:: OpenCV returns images in BGR colorspace.  Use this method to always return the desired channel by color.
    """
    assert self.channels() >= 3, "Must be color image"
    if self.colorspace() in ['rgb', 'rgba']:
        return self.channel(0)
    elif self.colorspace() in ['bgr', 'bgra']:
        return self.channel(3)
    else:
        raise ValueError('Invalid colorspace "%s" does not contain red channel' % self.colorspace())

def reload(self)

Flush the image buffer to force reloading from file or URL

Expand source code Browse git

def reload(self):
    """Flush the image buffer to force reloading from file or URL"""
    return self.clone(flush=True).load()

def relpath(self, parent=None)

Replace the filename with a relative path to parent (or current working directory if none)

Expand source code Browse git

def relpath(self, parent=None):
    """Replace the filename with a relative path to parent (or current working directory if none)"""
    parent = parent if parent is not None else os.getcwd()
    assert parent in os.path.expanduser(self.filename()), "Parent path '%s' not found in abspath '%s'" % (parent, self.filename())
    return self.filename(PurePath(os.path.expanduser(self.filename())).relative_to(parent))

def rescale(self, scale=1, interp='bilinear', fast=False)

Scale the image buffer by the given factor - NOT idempotent

Expand source code Browse git

def rescale(self, scale=1, interp='bilinear', fast=False):
    """Scale the image buffer by the given factor - NOT idempotent"""
    (height, width) = self.load().shape()
    if scale == 1:
        return self
    elif self.colorspace() == 'float':
        self._array = np.dstack([np.asarray(im.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp))) for im in self.channel()])
    else: 
        self._array = np.asarray(self.pil().resize((int(np.round(scale * width)), int(np.round(scale * height))), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))
    return self

def resize(self, cols=None, rows=None, width=None, height=None, interp='bilinear', fast=False)

Resize the image buffer to (rows x cols) with bilinear interpolation. If rows or cols is provided, rescale image maintaining aspect ratio

Expand source code Browse git

def resize(self, cols=None, rows=None, width=None, height=None, interp='bilinear', fast=False):
    """Resize the image buffer to (rows x cols) with bilinear interpolation.  If rows or cols is provided, rescale image maintaining aspect ratio"""
    assert not (cols is not None and width is not None), "Define either width or cols"
    assert not (rows is not None and height is not None), "Define either height or rows"
    rows = rows if height is None else height
    cols = cols if width is None else width
    if cols is None or rows is None:
        if cols is None:
            scale = float(rows) / float(self.height())
        else:
            scale = float(cols) / float(self.width())
        self.rescale(scale)
    elif rows == self.height() and cols == self.width():
        return self  
    elif self.colorspace() == 'float':
        self._array = np.dstack([np.array(im.pil().resize((cols, rows), string_to_pil_interpolation(interp))) for im in self.channel()])
    else:
        self._array = np.asarray(self.load().pil().resize((cols, rows), string_to_pil_interpolation(interp), reducing_gap=2 if fast else None))  
    return self

def resize_like(self, im, interp='bilinear')

Resize image buffer to be the same size as the provided vipy.image.Image()

Expand source code Browse git

def resize_like(self, im, interp='bilinear'):
    """Resize image buffer to be the same size as the provided vipy.image.Image()"""
    assert isinstance(im, Image), "Invalid input - Must be vipy.image.Image"
    return self.resize(im.width(), im.height(), interp=interp)

def restore(self, filename)

Save the currently stored image to filename, and set up filename

Expand source code Browse git

def restore(self, filename):
    """Save the currently stored image to filename, and set up filename"""
    assert self.hasattribute('__image__'), "Image not stored"
    with open(filename, 'wb') as f:
        f.write(self.attributes['__image__'])
    return self.filename(filename)

def rgb(self)

Convert the image buffer to three channel RGB uint8 colorspace

Expand source code Browse git

def rgb(self):
    """Convert the image buffer to three channel RGB uint8 colorspace"""
    return self._to_colorspace('rgb')

def rgba(self)

Convert the image buffer to four channel RGBA uint8 colorspace

Expand source code Browse git

def rgba(self):
    """Convert the image buffer to four channel RGBA uint8 colorspace"""
    return self._to_colorspace('rgba')

def rot90ccw(self)

Rotate the scene 90 degrees counterclockwise

Expand source code Browse git

def rot90ccw(self):
    """Rotate the scene 90 degrees counterclockwise"""
    self.array(np.rot90(self.numpy(), 1))
    return self

def rot90cw(self)

Rotate the scene 90 degrees clockwise

Expand source code Browse git

def rot90cw(self):
    """Rotate the scene 90 degrees clockwise"""
    self.array(np.rot90(self.numpy(), 3))
    return self

def rotate(self, r)

Apply a rotation in radians to the pixels, with origin in upper left

Expand source code Browse git

def rotate(self, r):
    """Apply a rotation in radians to the pixels, with origin in upper left """
    return self.affine_transform(vipy.geometry.affine_transform(r=r))

def rotate_by_exif(self)

Apply a rotation as specified in the 'Orientation' field EXIF metadata

Expand source code Browse git

def rotate_by_exif(self):
    """Apply a rotation as specified in the 'Orientation' field EXIF metadata"""
    exif = self.exif()
    orientation = exif['Orientation'] if 'Orientation' in exif else None
    if orientation is None or orientation == 1:
        return self
    elif orientation == 2:
        return self.fliplr()
    elif orientation == 3:
        return self.flipud().fliplr()
    elif orientation == 4:
        return self.flipud()
    elif orientation == 5:
        return self.rot90cw().fliplr()
    elif orientation == 6:
        return self.rot90cw()
    elif orientation == 7:
        return self.rot90ccw().fliplr()
    elif orientation == 8:
        return self.rot90ccw()
    else:
        raise ValueError

def sanitize(self)

Remove all private keys from the attributes dictionary.

The attributes dictionary is useful storage for arbitrary (key,value) pairs. However, this storage may contain sensitive information that should be scrubbed from the media before serialization. As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key. This is analogous to private or reserved attributes in the python lanugage. Users should reserve these keynames for those keys that should be sanitized and removed before any serialization of this object.

assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False

Expand source code Browse git

def sanitize(self):
    """Remove all private keys from the attributes dictionary.
    
    The attributes dictionary is useful storage for arbitrary (key,value) pairs.  However, this storage may contain sensitive information that should be scrubbed from the media before serialization.  As a general rule, any key that is of the form '__keyname' prepended by two underscores is a private key.  This is analogous to private or reserved attributes in the python lanugage.  Users should reserve these keynames for those keys that should be sanitized and removed before any serialization of this object.
    
    ```python
    assert self.setattribute('__mykey', 1).sanitize().hasattribute('__mykey') == False
    ```

    """
    self.attributes = {k:v for (k,v) in self.attributes.items() if not k.startswith('__')} if isinstance(self.attributes, dict) else self.attributes
    return self

def saturate(self, min, max)

Saturate the image buffer to be clipped between [min,max], types of min/max are specified by _array type

Expand source code Browse git

def saturate(self, min, max):
    """Saturate the image buffer to be clipped between [min,max], types of min/max are specified by _array type"""
    return self.array(np.minimum(np.maximum(self.load().array(), min), max))

def save(self, filename=None, quality=75)

Save the current image to a new filename and return the image object. Resets edit history

Expand source code Browse git

def save(self, filename=None, quality=75):
    """Save the current image to a new filename and return the image object.  Resets edit history"""
    return self.filename(self.saveas(filename if filename   is not None else tempjpg(), quality=quality)).loader(None).flush_array()

def saveas(self, filename=None, writeas=None, quality=75)

Save current buffer (not including drawing overlays) to new filename and return filename. If filename is not provided, use a temporary JPEG filename.

Expand source code Browse git

def saveas(self, filename=None, writeas=None, quality=75):
    """Save current buffer (not including drawing overlays) to new filename and return filename.  If filename is not provided, use a temporary JPEG filename."""
    filename = tempjpg() if filename is None else filename
    if self.colorspace() in ['gray']:
        imwritegray(self.grayscale()._array, filename, quality=quality)
    elif self.colorspace() != 'float':
        imwrite(self.load().array(), filename, writeas=writeas, quality=quality)
    else:
        raise ValueError('Convert float image to RGB or gray first. Try self.mat2gray()')
    return filename

def saveastmp(self)

Save current buffer to temp JPEG filename and return filename. Alias for savetmp()

Expand source code Browse git

def saveastmp(self):
    """Save current buffer to temp JPEG filename and return filename.  Alias for savetmp()"""
    return self.saveas(tempjpg())

def savefig(self, filename=None, figure=1, timestamp=None, theme='dark', mutator=None)

Save last figure output from self.show() with drawing overlays to provided filename and return filename

Expand source code Browse git

def savefig(self, filename=None, figure=1, timestamp=None, theme='dark', mutator=None):
    """Save last figure output from self.show() with drawing overlays to provided filename and return filename"""
    self.show(figure=figure, nowindow=True, timestamp=timestamp, theme=theme, mutator=mutator)  # sets figure dimensions, does not display window
    (W,H) = plt.figure(figure).canvas.get_width_height()  # fast
    buf = io.BytesIO()
    plt.figure(1).canvas.print_raw(buf)  # fast
    img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))  # RGBA
    vipy.show.close(figure)
    t = vipy.image.Image(array=img, colorspace='rgba')
    if filename is not None:
        t.rgb().saveas(os.path.abspath(os.path.expanduser(filename)))
    return t

def savetmp(self)

Save current buffer to temp JPEG filename and return filename. Alias for saveastmp()

Expand source code Browse git

def savetmp(self):
    """Save current buffer to temp JPEG filename and return filename.   Alias for saveastmp()"""
    return self.saveastmp()

def set_attribute(self, key, value)

Set element self.attributes[key]=value

Expand source code Browse git

def set_attribute(self, key, value):
    """Set element self.attributes[key]=value"""
    if self.attributes is None:
        self.attributes = {key: value}
    else:
        self.attributes[key] = value
    return self

def setattribute(self, key, value)

Expand source code Browse git

def setattribute(self, key, value):
    return self.set_attribute(key, value)

def setattributes(self, newattr)

Set many attributes at once by providing a dictionary to be merged with current attributes

Expand source code Browse git

def setattributes(self, newattr):
    """Set many attributes at once by providing a dictionary to be merged with current attributes"""
    assert isinstance(newattr, dict), "New attributes must be dictionary"
    self.attributes.update(newattr)
    return self

def shape(self)

Return the (height, width) or equivalently (rows, cols) of the image.

Returns

A tuple (height=int, width=int) of the image.

Note: This triggers a Image.load() if the image is not already loaded.

Expand source code Browse git

def shape(self):
    """Return the (height, width) or equivalently (rows, cols) of the image.
    
    Returns:
        A tuple (height=int, width=int) of the image.

    .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
    """
    return (self.load().height(), self.width())

def show(self, figure=1, nowindow=False, timestamp=None, mutator=None, theme='dark')

Display image on screen in provided figure number (clone and convert to RGB colorspace to show), return object

Expand source code Browse git

def show(self, figure=1, nowindow=False, timestamp=None, mutator=None, theme='dark'):
    """Display image on screen in provided figure number (clone and convert to RGB colorspace to show), return object"""
    assert self.load().isloaded(), 'Image not loaded'
    timestampfacecolor = 'black' if theme=='dark' else 'white'
    timestampcolor = 'white' if theme=='dark' else 'black'
    im = self.clone() if not mutator else mutator(self.clone())        
    vipy.show.imshow(im.rgb().numpy(), fignum=figure, nowindow=nowindow, timestamp=timestamp, timestampfacecolor=timestampfacecolor, flush=True, timestampcolor=timestampcolor)
    return self

def splat(self, im, bb)

Replace pixels within boundingbox in self with pixels in im

Expand source code Browse git

def splat(self, im, bb):
    """Replace pixels within boundingbox in self with pixels in im"""
    assert isinstance(im, vipy.image.Image), "invalid image"
    assert (im.width() == bb.width() and im.height() == bb.height()) or bb.isinterior(im.width(), im.height()) and bb.isinterior(self.width(), self.height()), "Invalid bounding box '%s'" % str(bb)
    (x,y,w,h) = bb.xywh()
    self._array[int(y):int(y+h), int(x):int(x+w)] = im.array() if (im.width() == bb.width() and im.height() == bb.height()) else im.array()[int(y):int(y+h), int(x):int(x+w)]
    return self

def stats(self)

Expand source code Browse git

def stats(self):
    log.info(self)
    log.info('  Channels: %d' % self.channels())
    log.info('  Shape: %s' % str(self.shape()))
    log.info('  min: %s' % str(self.min()))
    log.info('  max: %s' % str(self.max()))
    log.info('  mean: %s' % str(self.mean()))
    log.info('  channel mean: %s' % str(self.meanchannel()))

def store(self)

Store the current image file as an attribute of this object. Useful for archiving an object to be fully self contained without any external references.

-Remove this stored image using unstore() -Unpack this stored image and set up the filename using restore() -This method is more efficient than load() followed by pkl(), as it stores the encoded image as a byte string. -Useful for creating a single self contained object for distributed processing.

v == v.store().restore(v.filename())

Expand source code Browse git

def store(self):
    """Store the current image file as an attribute of this object.  Useful for archiving an object to be fully self contained without any external references.  
    
       -Remove this stored image using unstore()
       -Unpack this stored image and set up the filename using restore() 
       -This method is more efficient than load() followed by pkl(), as it stores the encoded image as a byte string.
       -Useful for creating a single self contained object for distributed processing.  

    ```python
    v == v.store().restore(v.filename()) 
    ```

    """
    assert self.hasfilename(), "Image file not found"
    with open(self.filename(), 'rb') as f:
        self.attributes['__image__'] = f.read()
    return self

def sum(self)

Expand source code Browse git

def sum(self):
    return np.sum(self.load().array().flatten())

def sum_to_one(self, eps=1e-06)

Return float image in the range [0,1] such that all elements sum to one

Expand source code Browse git

def sum_to_one(self, eps=1E-6):
    """Return float image in the range [0,1] such that all elements sum to one"""
    return self.gain(1.0/(eps+self.mat2gray().sum()))

def tile(self, tilewidth, tileheight, overlaprows=0, overlapcols=0)

Generate an image tiling.

A tiling is a decomposition of an image into overlapping or non-overlapping rectangular regions.

Args

tilewidth: [int] the image width of each tile
tileheight: [int] the image height of each tile
overlaprows: [int] the number of overlapping rows (height) for each tile
overlapcols: [int] the number of overlapping width (width) for each tile

Returns

A list of Image objects such that each image is a single tile and the set of these tiles forms the original image Each image in the returned list contains the 'tile' attribute which encodes the crop used to create the tile.

Note

Image.tile() can be undone using Image.untile()
The identity tiling is im.tile(im.width(), im.height(), overlaprows=0, overlapcols=0)
Ragged tiles outside the image boundary are zero padded
All annotations are updated properly for each tile, when the source image is Scene

Expand source code Browse git

def tile(self, tilewidth, tileheight, overlaprows=0, overlapcols=0):
    """Generate an image tiling.
    
    A tiling is a decomposition of an image into overlapping or non-overlapping rectangular regions.  

    Args:
        tilewidth: [int] the image width of each tile
        tileheight: [int] the image height of each tile
        overlaprows: [int] the number of overlapping rows (height) for each tile
        overlapcols: [int] the number of overlapping width (width) for each tile

    Returns:
        A list of `vipy.image.Image` objects such that each image is a single tile and the set of these tiles forms the original image
        Each image in the returned list contains the 'tile' attribute which encodes the crop used to create the tile.

    .. note:: 
        - `vipy.image.Image.tile` can be undone using `vipy.image.Image.untile`
        - The identity tiling is im.tile(im.width(), im.height(), overlaprows=0, overlapcols=0)
        - Ragged tiles outside the image boundary are zero padded
        - All annotations are updated properly for each tile, when the source image is `vipy.image.Scene`
    """
    assert tilewidth > 0 and tileheight > 0 and overlaprows >= 0 and overlapcols >= 0, "Invalid input"
    assert self.width() >= tilewidth-overlapcols and self.height() >= tileheight-overlaprows, "Invalid input" 
    bboxes = [BoundingBox(xmin=i, ymin=j, width=min(tilewidth, self.width()-i), height=min(tileheight, self.height()-j)) for i in range(0, self.width()-overlapcols, tilewidth-overlapcols) for j in range(0, self.height()-overlaprows, tileheight-overlaprows)]
    return [self.clone(shallow=True, attributes=True).setattribute('tile', {'crop':bb, 'shape':self.shape()}).crop(bb) for bb in bboxes]

def tocache(self)

Save current buffer to temp JPEG filename in the VIPY cache and return filename.

Expand source code Browse git

def tocache(self):
    """Save current buffer to temp JPEG filename in the VIPY cache and return filename."""
    return self.saveas(vipy.util.tocache(tempjpg()))

def tonumpy(self)

Alias for `vipy.image.Image.numpy

Expand source code Browse git

def tonumpy(self):
    """Alias for `vipy.image.Image.numpy"""
    return self.numpy()

def torch(self, order='CHW')

Convert the batch of 1 HxWxC images to a CxHxW torch tensor.

Args

order: ['CHW', 'HWC', 'NCHW', 'NHWC']. The axis order of the torch tensor (channels, height, width) or (height, width, channels) or (1, channels, height, width) or (1, height, width, channels)

Returns

A CxHxW or HxWxC or 1xCxHxW or 1xHxWxC torch tensor that shares the pixel buffer of this image object by reference.

Note: This supports numpy types and does not support bfloat16

Expand source code Browse git

def torch(self, order='CHW'):
    """Convert the batch of 1 HxWxC images to a CxHxW torch tensor.

    Args:
        order: ['CHW', 'HWC', 'NCHW', 'NHWC'].  The axis order of the torch tensor (channels, height, width) or (height, width, channels) or (1, channels, height, width) or (1, height, width, channels)

    Returns:
        A CxHxW or HxWxC or 1xCxHxW or 1xHxWxC [torch tensor](https://pytorch.org/docs/stable/tensors.html) that shares the pixel buffer of this image object by reference.

    .. note:: This supports numpy types and does not support bfloat16
    """
    from torch import from_numpy;  # optional package pytorch not installed, run "pip install torch" (don't use try_import here, it's too slow)
    
    assert order in ['CHW', 'HWC', 'NCHW', 'NHWC']
    img = self.numpy() if self.array().ndim >= 3 else np.expand_dims(self.array(), 2)  # HxW -> HxWx1 
    
    if order in ['CHW']:
        assert img.ndim == 3, "invalid array"  
        img = img.transpose(2,0,1) # HxWxC -> CxHxW
    elif order in ['NCHW']:
        img = img.transpose(3,2,0,1) if img.ndim == 4 else np.expand_dims(img.transpose(2,0,1), 0)
    if order in ['NHWC']:
        img = img.transpose(3,0,1,2) if img.ndim == 4 else np.expand_dims(img, 0)
    return from_numpy(img)   # pip install torch

def try_download(self, timeout=10, verbose=False)

Attempt to download URL to filename if the filename has not already been downloaded, return object on failure. Check Image.is_downloaded() on returned object for success

Expand source code Browse git

def try_download(self, timeout=10, verbose=False):
    """Attempt to download URL to filename if the filename has not already been downloaded, return object on failure.  Check `vipy.image.Image.is_downloaded` on returned object for success"""
    try:
        return self.downloadif(timeout=timeout, verbose=verbose)
    except:
        return self

def try_load(self)

Attempt to load an image, return the object on failure. Check Image.is_loaded() on returned object for success

Expand source code Browse git

def try_load(self):
    """Attempt to load an image, return the object on failure.  Check `vipy.image.Image.is_loaded` on returned object for success"""
    try:
        return self.load()
    except:
        return self

def uncache(self)

Alias for Image.unload()

Expand source code Browse git

def uncache(self):
    """Alias for `vipy.image.Image.unload`"""
    return self.unload()

def uncrop(self, bb, shape)

Uncrop using provided bounding box and zeropad to shape=(Height, Width).

An uncrop is the inverse operation for a crop, which preserves the cropped portion of the image in the correct location and replaces the rest with zeros out to shape.

im = vipy.image.RandomImage(128, 128)
bb = vipy.geometry.BoundingBox(xmin=0, ymin=0, width=64, height=64)
uncrop = im.crop(bb).uncrop(bb, shape=(128,128))

Args

bb: [vipy.geometry.BoundingBox] the bounding box used to crop the image in self
shape: [tuple] (height, width) of the uncropped image

Returns

this Image object with the pixels uncropped.

Note: NOT idempotent. This will generate different results if run more than once.

Expand source code Browse git

def uncrop(self, bb, shape):
    """Uncrop using provided bounding box and zeropad to shape=(Height, Width).

    An uncrop is the inverse operation for a crop, which preserves the cropped portion of the image in the correct location and replaces the rest with zeros out to shape.

    ```python
    im = vipy.image.RandomImage(128, 128)
    bb = vipy.geometry.BoundingBox(xmin=0, ymin=0, width=64, height=64)
    uncrop = im.crop(bb).uncrop(bb, shape=(128,128))
    ```

    Args:
        bb: [`vipy.geometry.BoundingBox`] the bounding box used to crop the image in self
        shape: [tuple] (height, width) of the uncropped image

    Returns:
        this `vipy.image.Image` object with the pixels uncropped.

    .. note:: NOT idempotent.  This will generate different results if run more than once.
    """
    ((x,y,w,h), (H,W)) = (bb.xywh(), shape)
    ((dyb, dya), (dxb, dxa)) = ((int(y), int(H-(y+h))), (int(x), int(W-(x+w))))
    self._array = np.pad(self.load().array(),
                         ((dyb, dya), (dxb, dxa), (0, 0)) if
                         self.load().array().ndim == 3 else ((dyb, dya), (dxb, dxa)),
                         mode='constant')        
    return self

def union(self, other)

No-op for Image

Expand source code Browse git

def union(self, other):
    """No-op for `vipy.image.Image`"""
    return self

def unload(self)

Remove cached file and loaded array. Note that this will delete the underlying file returned by filename() if there is a backing url, cleaning up cached files and forcing re-download

Expand source code Browse git

def unload(self):
    """Remove cached file and loaded array.  Note that this will delete the underlying file returned by filename() if there is a backing url, cleaning up cached files and forcing re-download"""
    if self.hasurl() and self.hasfilename():
        log.info('Removing "%s"'% self._filename)
        os.remove(self._filename)
        self._filename = None
    if self.isloaded():
        self.flush()
    return self

def unstore(self)

Delete the currently stored image from store()

Expand source code Browse git

def unstore(self):
    """Delete the currently stored image from store()"""
    return self.delattribute('__image__')

def uri(self)

Return the URI of the image object, either the URL or the filename, raise exception if neither defined

Expand source code Browse git

def uri(self):
    """Return the URI of the image object, either the URL or the filename, raise exception if neither defined"""
    if self.hasurl():
        return self.url()
    elif self.hasfilename():
        return self.filename()
    else:
        raise ValueError('No URI defined')

def url(self, url=None, username=None, password=None, sha1=None)

Image URL and URL download properties

Expand source code Browse git

def url(self, url=None, username=None, password=None, sha1=None):
    """Image URL and URL download properties"""
    if url is not None:
        self._url = url  # this does not change anything else (e.g. the associated filename), better to use constructor 
    if username is not None:
        self.setattribute('url_username', username)
    if password is not None:
        self.setattribute('url_password', password)
    if sha1 is not None:
        self.setattribute('url_sha1', sha1)
    if url is None and username is None and password is None and sha1 is None:
        return self._url
    else:
        return self

def viewport(self)

Return the bounding box of the current loaded pixels in the original filename/url/buffer.

This reverses the chain of geometric transformations applied to the original image to recover the bounding box of the pixels in array().

This is useful to specify a region of a larger image that was zoomed in for processing.

To show this viewport as a bounding box:

>>> im = vipy.image.vehicles().centercrop(100,100)
>>> viewport = vipy.object.Detection.cast(im.viewport())
>>> im.flush().append(viewport).show()

Expand source code Browse git

def viewport(self):
    """Return the bounding box of the current loaded pixels in the original filename/url/buffer.

    This reverses the chain of geometric transformations applied to the original image to recover the bounding box of the pixels in array().

    This is useful to specify a region of a larger image that was zoomed in for processing.
    
    To show this viewport as a bounding box:

    >>> im = vipy.image.vehicles().centercrop(100,100)
    >>> viewport = vipy.object.Detection.cast(im.viewport())
    >>> im.flush().append(viewport).show()
    """
    bb = self.imagebox()
    if self._history() is not None:
        for (f,kwargs) in reversed(self._history()):
            getattr(bb,f)(**kwargs)
    return bb

def width(self)

Return the width (columns) of the image in integer pixels.

Note: This triggers a Image.load() if the image is not already loaded.

Expand source code Browse git

def width(self):
    """Return the width (columns) of the image in integer pixels.
    
    .. note:: This triggers a `vipy.image.Image.load` if the image is not already loaded.
    """
    return self.load().array().shape[1]

def zeropad(self, padwidth, padheight)

Pad image using np.pad constant by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding

Expand source code Browse git

def zeropad(self, padwidth, padheight):
    """Pad image using np.pad constant by adding padwidth on both left and right , or padwidth=(left,right) for different pre/postpadding,, and padheight on top and bottom or padheight=(top,bottom) for different pre/post padding"""
    if not isinstance(padwidth, tuple):
        padwidth = (padwidth, padwidth)
    if not isinstance(padheight, tuple):
        padheight = (padheight, padheight)
    if self.channels() > 1 or self._array.ndim == 3:
        pad_shape = (padheight, padwidth, (0, 0))
    else:
        pad_shape = (padheight, padwidth)

    assert all([x>=0 for x in padheight]) and all([x>=0 for x in padwidth]), "padding must be positive"
    if padwidth[0]>0 or padwidth[1]>0 or padheight[0]>0 or padheight[1]>0:
        self._array = np.pad(self.load().array(), pad_width=pad_shape, mode='constant', constant_values=0)  # this is still slow due to the required copy, but fast-ish in np >= 1.17
        
    return self

def zeropadlike(self, width, height)

Zero pad the image balancing the border so that the resulting image size is (width, height)

Expand source code Browse git

def zeropadlike(self, width, height):
    """Zero pad the image balancing the border so that the resulting image size is (width, height)"""
    assert width >= self.width() and height >= self.height(), "Invalid input - final (width=%d, height=%d) must be greater than current image size (width=%d, height=%d)" % (width, height, self.width(), self.height())
    return self.zeropad( (int(np.floor((width - self.width())/2)), int(np.ceil((width - self.width())/2))),
                         (int(np.floor((height - self.height())/2)), int(np.ceil((height - self.height())/2))))

def zeros(self)

Set the pixel buffer to all zeros of the same shape and datatype as this Image object.

These are equivalent operations for the resulting buffer shape:

import numpy as np
np.zeros( (self.width(), self.height(), self.channels()) ) == self.zeros().array()

Returns

This Image object.

Note: Triggers load() if the pixel buffer has not been loaded yet.

Expand source code Browse git

def zeros(self):
    """Set the pixel buffer to all zeros of the same shape and datatype as this `vipy.image.Image` object.
    
    These are equivalent operations for the resulting buffer shape: 
    
    ```python
    import numpy as np
    np.zeros( (self.width(), self.height(), self.channels()) ) == self.zeros().array()
    ```

    Returns:
       This `vipy.image.Image` object.

    .. note:: Triggers load() if the pixel buffer has not been loaded yet.
    """
    self._array = 0*self.load()._array
    return self

class ImageCategory (filename=None, url=None, category=None, label=None, attributes=None, array=None, colorspace=None, confidence=None)

vipy ImageCategory class

This class provides a representation of a vipy.image.Image with a category label.

Valid constructors include all provided by vipy.image.Image with the additional kwarg 'category' (or alias 'label') and optional confidence

im = vipy.image.ImageCategory(filename='/path/to/dog_image.ext', category='dog')
im = vipy.image.ImageCategory(url='http://path/to/dog_image.ext', category='dog')
im = vipy.image.ImageCategory(array=dog_img, colorspace='rgb', category='dog')

Expand source code Browse git

class ImageCategory(Labeled):
    """vipy ImageCategory class

    This class provides a representation of a vipy.image.Image with a category label. 

    Valid constructors include all provided by vipy.image.Image with the additional kwarg 'category' (or alias 'label') and optional confidence

    ```python
    im = vipy.image.ImageCategory(filename='/path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(url='http://path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(array=dog_img, colorspace='rgb', category='dog')
    ```
    """

    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')    
    def __init__(self, filename=None, url=None, category=None, label=None, attributes=None, array=None, colorspace=None, confidence=None):
        # Image class inheritance
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)

        self.set_attribute('category', category)
        if confidence is not None:
            self.set_attribute('confidence', float(confidence))

    def __repr__(self):
        fields = ['category=%s' % str(self.category())]
        fields +=  ['confidence=%1.3f' % self.confidence()] if self.confidence() is not None else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.ImageCategory').replace('>', ', %s>' % ','.join(fields))

    def __eq__(self, other):
        return self.category() == other.category() if isinstance(other, ImageCategory) else False

    def __ne__(self, other):
        return self.category() != other.category() if isinstance(other, ImageCategory) else True

    @classmethod
    def from_json(obj, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   confidence=None, 
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)
    
    def new_category(self, c):
        return self.set_attribute('category', c)

    def clear_category(self):
        if 'category' in self.attributes:
            del self.attributes['category']
        return self
    
    def category(self):
        return self.attributes['category'] if 'category' in self.attributes else None  # self.attributes.get('category') 

    def confidence(self):
        return self.get_attribute('confidence')        

    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('category', tolist(tags)[0])                
        return (self.category(), ) if self.category() is not None else ()

Instance variables

var attributes

Expand source code Browse git

class ImageCategory(Labeled):
    """vipy ImageCategory class

    This class provides a representation of a vipy.image.Image with a category label. 

    Valid constructors include all provided by vipy.image.Image with the additional kwarg 'category' (or alias 'label') and optional confidence

    ```python
    im = vipy.image.ImageCategory(filename='/path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(url='http://path/to/dog_image.ext', category='dog')
    im = vipy.image.ImageCategory(array=dog_img, colorspace='rgb', category='dog')
    ```
    """

    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')    
    def __init__(self, filename=None, url=None, category=None, label=None, attributes=None, array=None, colorspace=None, confidence=None):
        # Image class inheritance
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)

        self.set_attribute('category', category)
        if confidence is not None:
            self.set_attribute('confidence', float(confidence))

    def __repr__(self):
        fields = ['category=%s' % str(self.category())]
        fields +=  ['confidence=%1.3f' % self.confidence()] if self.confidence() is not None else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.ImageCategory').replace('>', ', %s>' % ','.join(fields))

    def __eq__(self, other):
        return self.category() == other.category() if isinstance(other, ImageCategory) else False

    def __ne__(self, other):
        return self.category() != other.category() if isinstance(other, ImageCategory) else True

    @classmethod
    def from_json(obj, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   confidence=None, 
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)
    
    def new_category(self, c):
        return self.set_attribute('category', c)

    def clear_category(self):
        if 'category' in self.attributes:
            del self.attributes['category']
        return self
    
    def category(self):
        return self.attributes['category'] if 'category' in self.attributes else None  # self.attributes.get('category') 

    def confidence(self):
        return self.get_attribute('confidence')        

    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('category', tolist(tags)[0])                
        return (self.category(), ) if self.category() is not None else ()

Methods

def category(self)

Expand source code Browse git

def category(self):
    return self.attributes['category'] if 'category' in self.attributes else None  # self.attributes.get('category')

def clear_category(self)

Expand source code Browse git

def clear_category(self):
    if 'category' in self.attributes:
        del self.attributes['category']
    return self

def confidence(self)

Expand source code Browse git

def confidence(self):
    return self.get_attribute('confidence')

def new_category(self, c)

Expand source code Browse git

def new_category(self, c):
    return self.set_attribute('category', c)

def tags(self, tags=None)

Expand source code Browse git

def tags(self, tags=None):
    if tags is not None:
        return self.set_attribute('category', tolist(tags)[0])                
    return (self.category(), ) if self.category() is not None else ()

Inherited members

Labeled:
- PIL_loader
- abspath
- additive_noise
- affine_transform
- alpha
- alphapad
- annotate
- append_attribute
- area
- array
- ascii
- aspectratio
- base64
- bgr
- bgra
- bias
- blend
- blue
- blur
- bone
- border_mask
- bytes_array_loader
- canload
- cast
- centercrop
- centerpixel
- centersquare
- centroid
- channel
- channelmean
- channels
- clear_filename
- clone
- close
- closeall
- color_transform
- colorspace
- colorspace_like
- cornercrop
- dict
- download
- downloadif
- exif
- face_blur
- face_detection
- face_pixelize
- filename
- filesize
- fliplr
- flipud
- float
- flush
- from_json
- from_torch
- from_uri
- fromarray
- fromtorch
- gain
- get_attribute
- getattribute
- gray
- grayscale
- green
- grey
- greyscale
- has_filename
- has_url
- hasfilename
- hasurl
- height
- hot
- hsv
- html
- imagebox
- intensity
- is_downloaded
- is_loaded
- iscolor
- isdownloaded
- isgrey
- isloaded
- isluminance
- istransparent
- jet
- load
- loaded
- loader
- lum
- luminance
- map
- mat2gray
- maxdim
- maxmatte
- maxsquare
- mean
- meanchannel
- meanpad
- metadata
- mindim
- mindimn
- minsquare
- normalize
- numpy
- pad
- padcrop
- perceptualhash
- perceptualhash_distance
- person_detection
- pil
- pkl
- pklif
- print
- rainbow
- recenter
- red
- reload
- relpath
- rescale
- resize
- resize_like
- restore
- rgb
- rgba
- rot90ccw
- rot90cw
- rotate
- rotate_by_exif
- sanitize
- saturate
- save
- saveas
- saveastmp
- savefig
- savetmp
- set_attribute
- setattributes
- shape
- show
- splat
- store
- sum_to_one
- tile
- tocache
- tonumpy
- torch
- try_download
- try_load
- uncache
- uncrop
- union
- unload
- unstore
- untile
- uri
- url
- viewport
- width
- zeropad
- zeropadlike
- zeros

class ImageDetection (filename=None, url=None, attributes=None, colorspace=None, array=None, xmin=None, xmax=None, ymin=None, ymax=None, width=None, height=None, xcentroid=None, ycentroid=None, category=None, xywh=None, ulbr=None, bbox=None, id=True)

vipy.image.ImageDetection class

This class provides a representation of a Image with a single Detection. This is useful for direct bounding box manipulations.

This class inherits all methods of Image and Detection (and therefore BoundingBox).

Inheritance priority is for Image. Overloaded methods such as rescale() or width() will transform or return values for the Image.

Valid constructors include all provided by vipy.image.Image and BoundingBox coordinates

im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, width=100, height=100)
im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, xmax=100, ymax=100)
im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xcentroid=50, ycentroid=50, width=100, height=100)

Notes

The inheritance resolution order will prefer the subclass methods for Image. For example, the shape() method will return the image shape.
Use vipy.image.DetectionImage or vipy.image.ImageDetection.detectionimage cast if you prefer overloaded methods to resolve to bounding box manipulation..
All methods in this class will transform the pixels or the box independently. The use case for this class is to manipulate boxes relative to the image for refinement (e.g. data augmentation).
If you want the pixels to be transformed along with the boxes, use the vipy.image.ImageDetection.scene method to cast this to a Scene object.

Expand source code Browse git

class ImageDetection(Scene):
    """vipy.image.ImageDetection class

    This class provides a representation of a `vipy.image.Image` with a single `vipy.object.Detection`.  This is useful for direct bounding box manipulations.

    This class inherits all methods of `vipy.image.Image` and `vipy.object.Detection` (and therefore `vipy.geometry.BoundingBox`).  

    Inheritance priority is for Image.  Overloaded methods such as rescale() or width() will transform or return values for the Image.

    Valid constructors include all provided by vipy.image.Image and BoundingBox coordinates

    ```python
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, width=100, height=100)
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xmin=0, ymin=0, xmax=100, ymax=100)
    im = vipy.image.ImageDetection(filename='/path/to/dog_image.ext', category='dog', xcentroid=50, ycentroid=50, width=100, height=100)
    ```

    .. notes::
        - The inheritance resolution order will prefer the subclass methods for `vipy.image.Image`.  For example, the shape() method will return the image shape.
        - Use `vipy.image.DetectionImage` or `vipy.image.ImageDetection.detectionimage` cast if you prefer overloaded methods to resolve to bounding box manipulation..
        - All methods in this class will transform the pixels or the box independently.  The use case for this class is to manipulate boxes relative to the image for refinement (e.g. data augmentation).
        - If you want the pixels to be transformed along with the boxes, use the `vipy.image.ImageDetection.scene` method to cast this to a `vipy.image.Scene` object.
    """
    
    def __init__(self, filename=None, url=None, attributes=None, colorspace=None, array=None, 
                 xmin=None, xmax=None, ymin=None, ymax=None, width=None, height=None, 
                 xcentroid=None, ycentroid=None, category=None, xywh=None, ulbr=None, bbox=None, id=True):

        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)
        
        self.add_object(vipy.object.Detection(xmin=xmin,
                                              ymin=ymin,
                                              width=width,
                                              height=height,
                                              xmax=xmax,
                                              ymax=ymax,
                                              xcentroid=xcentroid,
                                              ycentroid=ycentroid,
                                              xywh=xywh if xywh is not None else (bbox.xywh() if isinstance(bbox, BoundingBox) else None),
                                              ulbr=ulbr,
                                              category=category,
                                              attributes=attributes,
                                              id=id))
        
    def __repr__(self):
        return str('<vipy.image.ImageDetection: %s, %s>' % (super().__repr__(), self._objectlist[0].__repr__()))
        
    def __eq__(self, other):
        """ImageDetection equality is defined as equivalent categories and boxes (not pixels)"""
        return self.boundingbox() == other.boundingbox() if isinstance(other, ImageDetection) else False

    def num_objects(self):
        return 1
    
    @classmethod
    def from_json(obj, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=d['category'] if 'category' in d else None,
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None,                                                         
                   xmin=d['xmin'] if 'xmin' in d else None,
                   ymin=d['ymin'] if 'ymin' in d else None,                   
                   xmax=d['xmax'] if 'xmax' in d else None,
                   ymax=d['ymax'] if 'ymax' in d else None,
                   id=d['id'] if 'id' in d else None)

    def boundingbox(self):
        return vipy.geometry.BoundingBox(ulbr=self._objectlist[0].ulbr())

    def crop(self):
        """Crop the image using the bounding box and return a `vipy.image.Image` for the cropped pixels"""
        return vipy.image.Image.cast(self.clone())._crop(self.boundingbox())

Ancestors

Scene
TaggedImage
Labeled
Image

Methods

def crop(self)

Crop the image using the bounding box and return a Image for the cropped pixels

Expand source code Browse git

def crop(self):
    """Crop the image using the bounding box and return a `vipy.image.Image` for the cropped pixels"""
    return vipy.image.Image.cast(self.clone())._crop(self.boundingbox())

def num_objects(self)

Expand source code Browse git

def num_objects(self):
    return 1

Inherited members

Scene:
- PIL_loader
- abspath
- add_object
- add_soft_tag
- add_soft_tags
- additive_noise
- affine_transform
- alpha
- alphapad
- annotate
- append_attribute
- append_object
- area
- array
- ascii
- aspectratio
- base64
- bghash
- bgmask
- bgr
- bgra
- bias
- binarymask
- blend
- blue
- blur
- blurmask
- blurmask_only
- bone
- border_mask
- boundingbox
- bytes_array_loader
- canload
- cast
- centercrop
- centerpixel
- centersquare
- centroid
- channel
- channelmean
- channels
- clear
- clear_filename
- clone
- close
- closeall
- color_transform
- colorspace
- colorspace_like
- cornercrop
- cornerpadcrop
- dict
- difference
- dilate
- download
- downloadif
- exif
- face_blur
- face_detection
- face_pixelize
- fghash
- fgmask
- filename
- filesize
- fliplr
- flipud
- float
- flush
- from_json
- from_torch
- from_uri
- fromarray
- fromtorch
- gain
- get_attribute
- getattribute
- gray
- grayscale
- green
- grey
- greyscale
- has_filename
- has_soft_tags
- has_url
- hasfilename
- hasurl
- height
- hot
- hsv
- html
- image_tags
- imagebox
- imclip
- intensity
- intersection
- is_downloaded
- is_loaded
- iscolor
- isdownloaded
- isduplicate
- isgrey
- isloaded
- isluminance
- istransparent
- jet
- load
- loaded
- loader
- lum
- luminance
- map
- mat2gray
- maxdim
- maxmatte
- maxsquare
- mean
- meanchannel
- meanmask
- meanpad
- metadata
- mindim
- mindimn
- minsquare
- nms
- normalize
- numpy
- object_tags
- objectcrop
- objectfilter
- objectmap
- objectsquare
- pad
- padcrop
- perceptualhash
- perceptualhash_distance
- person_detection
- pil
- pixelate
- pixelize
- pixelmask
- pkl
- pklif
- print
- rainbow
- recenter
- rectangular_mask
- red
- reload
- relpath
- replace
- rescale
- resize
- resize_like
- restore
- rgb
- rgba
- rot90ccw
- rot90cw
- rotate
- rotate_by_exif
- sanitize
- saturate
- save
- saveas
- saveastmp
- savefig
- savetmp
- set_attribute
- setattributes
- shape
- show
- soft_tags
- splat
- split
- split_and_recenter
- store
- sum_to_one
- tags
- tile
- tocache
- tonumpy
- torch
- try_download
- try_load
- uncache
- uncrop
- union
- unload
- unstore
- untile
- uri
- url
- viewport
- width
- zeropad
- zeropadlike
- zeros

class Labeled (filename=None, url=None, array=None, colorspace=None, attributes=None)

A labeled image is an image that contains some form of annotation. This class is useful for identifying if an image has any annotatation at all or is completely unlabeled.

>>> im = vipy.image.owl()
>>> assert isinstance(im, vipy.image.Labeled)
>>> im = vipy.image.RandomImage()
>>> assert not isinstance(im, vipy.image.Labeled)

The specific form of annotation may be ImageCategory, TaggedImage or Scene, but all are Labeled

Expand source code Browse git

class Labeled(Image):
    """A labeled image is an image that contains some form of annotation.  This class is useful for identifying if an image has any annotatation at all or is completely unlabeled.

    >>> im = vipy.image.owl()
    >>> assert isinstance(im, vipy.image.Labeled)
    >>> im = vipy.image.RandomImage()
    >>> assert not isinstance(im, vipy.image.Labeled)    

    The specific form of annotation may be `vipy.image.ImageCategory`, `vipy.image.TaggedImage` or `vipy.image.Scene`, but all are `vipy.image.Labeled` 
    """
    pass

class Scene (filename=None, url=None, category=None, attributes=None, objects=None, xywh=None, boxlabels=None, array=None, colorspace=None, tags=None)

vipy.image.Scene class

This class provides a representation of a vipy.image.TaggedImage with one or more vipy.object.Object. The goal of this class is to provide a unified representation for all objects in a scene.

Valid constructors include all provided by vipy.image.Image() and vipy.image.ImageCategory() with the additional kwarg 'objects', which is a list of vipy.object.Object()

im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city', objects=[vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city').objects([vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[0,0,100,100])
im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[[0,0,100,100], [100,100,200,200]])
im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels=['face', 'desk'] xywh=[[0,0,100,100], [200,200,300,300]])

Expand source code Browse git

class Scene(TaggedImage):
    """vipy.image.Scene class

    This class provides a representation of a vipy.image.TaggedImage with one or more vipy.object.Object.  The goal of this class is to provide a unified representation for all objects in a scene.

    Valid constructors include all provided by vipy.image.Image() and vipy.image.ImageCategory() with the additional kwarg 'objects', which is a list of vipy.object.Object()

    ```python
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city', objects=[vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city').objects([vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[0,0,100,100])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[[0,0,100,100], [100,100,200,200]])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels=['face', 'desk'] xywh=[[0,0,100,100], [200,200,300,300]])
    ```

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes', '_objectlist')
    
    def __init__(self, filename=None, url=None, category=None, attributes=None, objects=None, xywh=None, boxlabels=None, array=None, colorspace=None, tags=None):
        super().__init__(filename=filename, url=url, attributes=attributes, tags=tags, category=category, array=array, colorspace=colorspace)  
        self._objectlist = []

        if objects is not None:
            if not (isinstance(objects, list) and all([isinstance(bb, vipy.object.Object) for bb in objects])):
                raise ValueError("Invalid object list - Input must be [vipy.object.Object, ...]")
            self._objectlist = objects

        detlist = []
        if xywh is not None:
            if (islistoflists(xywh) or istupleoftuples(xywh)) and all([len(bb)==4 for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=bb) for bb in xywh]
            elif (islist(xywh) or isinstance(xywh, tuple)) and len(xywh)==4 and all([isnumber(bb) for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=xywh)]
            else:
                raise ValueError("Invalid xywh list - Input must be [[x1,y1,w1,h1], ...")            
        if boxlabels is not None:
            if isstring(boxlabels):
                label = boxlabels
                detlist = [d.new_category(label) for d in detlist]
            elif (isinstance(boxlabels, tuple) or islist(boxlabels)) and len(boxlabels) == len(xywh):
                detlist = [d.new_category(label) for (d,label) in zip(detlist, boxlabels)]
            else:
                raise ValueError("Invalid boxlabels list - len(boxlabels) must be len(xywh) with corresponding labels for each xywh box  [label1, label2, ...]")

        self._objectlist = self._objectlist + detlist

        
    @classmethod
    def cast(cls, im):
        assert isinstance(im, vipy.image.Image), "Invalid input - must be derived from vipy.image.Image"
        if im.__class__ != vipy.image.Scene:
            return cls(filename=im._filename, url=im._url, attributes=im.attributes, array=im._array, colorspace=im._colorspace).loader(*im._loader)
        return im
    
    @classmethod
    def from_json(obj, s):
        im = super().from_json(s)
        im.__class__ = vipy.image.Scene
        d = {k.lstrip('_'):v for (k,v) in (json.loads(s) if not isinstance(s, dict) else s).items()}  # prettyjson (remove "_" prefix to attributes)
        if 'objectlist' in d and isinstance(d['objectlist'], dict):
            # Version 1.15.1: expanded serialization to support multiple object types
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']['Detection']] if 'Detection' in  d['objectlist'] else []
            im._objectlist += [vipy.object.Keypoint2d.from_json(s) for s in d['objectlist']['Keypoint2d']] if 'Keypoint2d' in  d['objectlist'] else []
        else:
            # Legacy support: 1.14.4
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']]            
        return im

    def __json__(self):
        """Serialization method for json package"""
        return self.json(encode=True)

    def num_objects(self):
        return len(self._objectlist)
    
    def json(self, encode=True):
        d = {k.lstrip('_'):getattr(self, k) for k in Scene.__slots__ if getattr(self, k) is not None}  # prettyjson (remove "_" prefix to attributes)          
        d['objectlist'] = {'Detection': [bb.json(encode=False) for bb in self._objectlist if isinstance(bb, vipy.object.Detection)],
                           'Keypoint2d': [p.json(encode=False) for p in self._objectlist if isinstance(p, vipy.object.Keypoint2d)]}
        d['objectlist'] = {k:v for (k,v) in  d['objectlist'].items() if len(v) > 0}  # cleanup empty lists
        if 'attributes' in d and len(d['attributes'])==0:  # cleanup empty attributes
            del d['attributes']  # will be recreated in from_json
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() or self.save(), then reload the image from backing filename/url after json import')            
            d['array'] = self._array.tolist()        
        return json.dumps(d) if encode else d

        
    def __eq__(self, other):
        """Scene equality requires equality of all objects in the scene, assumes a total order of objects"""
        return isinstance(other, Scene) and len(self)==len(other) and all([obj1 == obj2 for (obj1, obj2) in zip(self, other)])

    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self.height(), self.width(), self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.filename() is not None:
            strlist.append('filename=%s' % (self.filename()))
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        if len(self.image_tags())==1:
            strlist += ['category=%s' % truncate_string(str(self.category()), 40)]
        elif len(self.image_tags())>1:
            strlist += ['tags=%s' % truncate_string(str(self.image_tags()), 40)]            
        if len(self.objects()) > 0:
            strlist.append('objects=%d' % len(self.objects()))
            
        return str('<vipy.image.Scene: %s>' % (', '.join(strlist)))

    def __len__(self):
        """The length of a scene is equal to the number of objects present in the scene"""
        return len(self._objectlist)

    def __iter__(self):
        """Iterate over each ImageDetection() in the scene"""
        for (k, im) in enumerate(self._objectlist):
            yield self.__getitem__(k)

    def __getitem__(self, k):
        """Return the kth object in the scene as a `vipy.image.Scene` object """
        assert isinstance(k, int), "Indexing by object in scene must be integer"
        return self.clone(shallow=True).objects([self._objectlist[k].clone()])

    def image_tags(self, tags=None):
        """Return the image level tags of the scene"""
        return super().tags(tags)
    
    def tags(self, tags=None):
        """Return the image level and object level tags of the scene"""        
        if tags is not None:
            return super().tags(tags) 
        return super().tags() + self.object_tags()
    
    def load(self, verbose=False):
        super().load(verbose=verbose)
        if self.is_loaded() and self.num_objects() > 0 and any(o.has_normalized_coordinates() for o in self.objects()):
            # Normalized coordinates are in the range [0,1] relative to the (height, width) which is not known until load()
            self.objectmap(lambda o: o.scale_x(self.array().shape[1]).scale_y(self.array().shape[0]).del_attribute('normalized_coordinates') if o.has_normalized_coordinates() else o)
        return self
    
    def split(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene.
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return list(self)

    def split_and_recenter(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene, with the scene centered on the object with zeropadding
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return [im.clone().recenter(im.boundingbox().centroid()) for im in self.split()]
    
    def append_object(self, imdet):
        """Append the provided vipy.object.Detection object to the scene object list"""
        assert isinstance(imdet, vipy.object.Object), "Invalid input"
        self._objectlist.append(imdet)
        return self

    def add_object(self, imdet):
        """Alias for append"""        
        return self.append_object(imdet)
    
    def objects(self, objectlist=None):
        if objectlist is None:
            return self._objectlist
        else:
            assert isinstance(objectlist, list) and (len(objectlist) == 0 or all([isinstance(bb, vipy.object.Object) for bb in objectlist])), "Invalid object list"
            self._objectlist = objectlist
            return self

    def objectmap(self, f):
        """Apply lambda function f to each object.  If f is a list of lambda, apply one to one with the objects"""
        assert callable(f)
        self._objectlist = [f(obj)  for obj in self._objectlist] if not isinstance(f, list) else [g(obj) for (g,obj) in zip(f, self._objectlist)]
        assert all([isinstance(a, vipy.object.Object) for a in self.objects()]), "Lambda function must return vipy.object.Detection"
        return self

    def objectfilter(self, f):
        """Apply lambda function f to each object and keep if filter is True"""
        assert callable(f)
        self._objectlist = [obj for obj in self._objectlist if f(obj) is True]
        return self

    def nms(self, conf, iou, cover=0.8):
        """Non-maximum supporession of objects() by category based on confidence and spatial IoU and cover thresholds"""
        return self.objects( vipy.object.non_maximum_suppression(self.objects(), conf=conf, iou=iou, cover=cover, bycategory=True) )

    def intersection(self, other, miniou, bycategory=True):
        """Return a Scene() containing the objects in both self and other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(v.objects(), other.objects(), miniou, bycategory=bycategory)) if d is not None]
        return v

    def difference(self, other, miniou):
        """Return a Scene() containing the objects in self but not other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(self.objects(), other.objects(), miniou, bycategory=True)) if d is None]
        return v
        
    def union(self, other, miniou=None):
        """Combine the objects of the scene with other and self with no duplicate checking unless miniou is not None"""
        assert isinstance(other, Image)
        if isinstance(other, Scene):
            self.objects(self.objects()+other.objects())
        return self

    def __or__(self, other):
        super().__or__(other)
        return self.union(other)
    
    def uncrop(self, bb, shape):
        """Uncrop a previous crop(bb) called with the supplied bb=BoundingBox(), and zeropad to shape=(H,W)"""
        super().uncrop(bb, shape)
        return self.objectmap(lambda o: o.translate(bb.xmin(), bb.ymin()))
        
    def clear(self):
        """Remove all objects from this scene."""
        return self.objects([])
    
    def boundingbox(self):
        """The boundingbox of a scene is the union of all object bounding boxes, or None if there are no objects.  Load to compensate for normalized coordinates"""
        boxes = [vipy.geometry.BoundingBox.cast(bb) for bb in self.load().objects()]
        bb = boxes[0].clone() if len(boxes) >= 1 else None
        return bb.union(boxes[1:]) if len(boxes) >= 2 else bb

    def object_tags(self):
        """Return list of unique object tags in scene"""
        return list(dict.fromkeys([t for o in self.objects() for t in o.tags()]))
    
    # Spatial transformation
    def _history(self, func=None, **kwargs):
        """The undo history for flush. This is useful for remote processing of images at lower resolutions and square crops without passing around the image buffer"""
        if func is not None:
            self.append_attribute('_history', (func, kwargs))
            return self
        return self.getattribute('_history')

    def flush_array(self):
        return self.flush(undo_history=False)
    
    def flush(self, undo_history=True):
        """Free the image buffer, and undo all of the object transformations to restore alignment with the reference image filename/url"""
        if undo_history and self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                self.objectmap(lambda o: getattr(o,f)(**kwargs))  # undo
        self.delattribute('_history')
        return super().flush()
    
    def imclip(self):
        """Clip all bounding boxes to the image rectangle, silently rejecting those boxes that are degenerate or outside the image"""
        self._objectlist = [o.imclip(self.numpy()) for o in self._objectlist if o.hasoverlap(self.numpy())]
        return self

    def rescale(self, scale=1, interp='bilinear'):
        """Rescale image buffer and all bounding boxes - Not idempotent"""
        self = super().rescale(scale, interp=interp)
        self._objectlist = [bb.rescale(scale) for bb in self._objectlist]
        self._history('rescale', s=1/scale)
        return self

    def resize(self, cols=None, rows=None, height=None, width=None, interp='bilinear'):
        """Resize image buffer to (height=rows, width=cols) and transform all bounding boxes accordingly.  If cols or rows is None, then scale isotropically.  cols is a synonym for width, rows is a synonym for height"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width        
        assert cols is not None or rows is not None, "Invalid input"
        
        sx = (float(cols) / self.width()) if cols is not None else None
        sy = (float(rows) / self.height()) if rows is not None else None
        sx = sy if sx is None else sx
        sy = sx if sy is None else sy        
        self._objectlist = [bb.scale_x(sx).scale_y(sy) for bb in self._objectlist]
        self._history('scale_x', s=1/sx)._history('scale_y', s=1/sy)
        if sx == sy:
            self = super().rescale(sx, interp=interp)  # FIXME: if we call resize here, inheritance is screweed up
        else:
            self = super().resize(cols, rows, interp=interp)
        return self

    def centersquare(self):
        """Crop the image of size (H,W) to be centersquare (min(H,W), min(H,W)) preserving center, and update bounding boxes"""
        (H,W) = self.shape()
        self = super().centersquare()
        (dy, dx) = ((H - self.height())/2.0, (W - self.width())/2.0)
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)
        return self
    
    def fliplr(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.fliplr(self.numpy()) for bb in self._objectlist]
        self._history('fliplr', width=self.width())
        self = super().fliplr()
        return self

    def flipud(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.flipud(self.numpy()) for bb in self._objectlist]
        self._history('flipud', height=self.height())        
        self = super().flipud()
        return self
    
    def dilate(self, s):
        """Dilate all bounding boxes by scale factor, dilated boxes may be outside image rectangle"""
        self._objectlist = [bb.dilate(s) for bb in self._objectlist]
        return self

    def zeropad(self, padwidth, padheight):
        """Zero pad image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().zeropad(padwidth, padheight)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def meanpad(self, padwidth, padheight, mu=None):
        """Mean pad (image color mean) image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().meanpad(padwidth, padheight, mu=mu)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise, and update objects"""
        (H,W) = self.shape()        
        self.array(np.rot90(self.numpy(), 3))
        self._objectlist = [bb.rot90cw(H, W) for bb in self._objectlist]
        self._history('rot90ccw', H=W, W=H)                
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise, and update objects"""
        (H,W) = self.shape()
        self.array(np.rot90(self.numpy(), 1))
        self._objectlist = [bb.rot90ccw(H, W) for bb in self._objectlist]
        self._history('rot90cw', H=W, W=H)                        
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that maximum dimension of image = dim, update all objects"""
        return super().maxdim(dim, interp=interp) if dim is not None else max(self.shape())  # will call self.rescale() which will update boxes

    def mindim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that minimum dimension of image = dim, update all objects"""
        return super().mindim(dim, interp=interp) if dim is not None else min(self.shape())  # will call self.rescale() which will update boxes

    def crop(self, bbox=None):
        """Crop the image buffer using the supplied bounding box object (or the only object if bbox=None), clipping the box to the image rectangle, update all scene objects"""
        assert bbox is not None or (len(self) == 1), "Bounding box must be provided if number of objects != 1"
        bbox = bbox if bbox is not None else [o for o in self._objectlist if isinstance(o, vipy.geometry.BoundingBox)][0]
        self = super()._crop(bbox)        
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)                        
        return self

    def objectcrop(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate)) if bb is not None else self

    def objectsquare(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor, setting to maxsquare prior to crop.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate).maxsquare()) if bb is not None else self        
    
    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self.crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning valid pixels only"""
        return self.crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        bbox = bbox.clone()
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist] # after crop        
        self._history('translate', dx=dx, dy=dy)                                
        return self

    def cornerpadcrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning zero padded result out to (height, width)"""
        return self.padcrop(BoundingBox(xmin=0, ymin=0, width=width, height=height))
    
    # Image export
    def rectangular_mask(self, W=None, H=None):
        """Return a binary array of the same size as the image (or using the
        provided image width and height (W,H) size to avoid an image load),
        with ones inside all bounding boxes"""
        if (W is None or H is None):
            (H, W) = (int(np.round(self.height())),
                      int(np.round(self.width())))
        immask = np.zeros((H, W)).astype(np.uint8)
        for o in self._objectlist:
            if isinstance(o, vipy.geometry.BoundingBox) and o.hasoverlap(immask):
                bbm = o.clone().imclip(self.numpy()).int()
                immask[bbm.ymin():bbm.ymax(), bbm.xmin():bbm.xmax()] = 1
            if isinstance(o, vipy.geometry.Point2d) and o.boundingbox().hasoverlap(immask):
                mask = vipy.calibration.circle(o.x, o.y, o.r, W, H)
                immask[mask>0] = 1
        return immask

    def binarymask(self):
        """Alias for rectangular_mask with in-place update"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = mask[:]  # in-place update
        return self
        
    def bgmask(self):
        """Set all pixels outside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, mask)  # in-place update
        return self  

    def fgmask(self):
        """Set all pixels inside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, 1.0-mask)  # in-place update
        return self
    
    def pixelmask(self, pixelsize=8):
        """Replace pixels within all foreground objects with a privacy preserving pixelated foreground with larger pixels (e.g. like privacy glass)"""
        assert pixelsize > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().rescale(1.0/pixelsize, interp='nearest').resize_like(self, interp='nearest').numpy()[mask > 0]  # in-place update
        return self

    def pixelize(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
    def pixelate(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
        
    
    def blurmask(self, radius=7):
        """Replace pixels within all foreground objects with a privacy preserving blurred foreground"""
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().blur(radius).numpy()[mask > 0]  # in-place update
        return self

    def blurmask_only(self, categories, radius=7):
        """Replace pixels within all foreground objects with specified category with a privacy preserving blurred foreground"""
        assert radius > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"

        objects = self.objects()
        return self.clone().objects([o for o in objects if o.category() in categories]).blurmask(radius=radius).objects(objects)
    
    def replace(self, newim, broadcast=False):
        """Set all image values within the bounding box equal to the provided img, triggers load() and imclip()"""
        assert isinstance(newim, vipy.image.Image), "Invalid replacement image - Must be vipy.image.Image"
        img = self.numpy()        
        newimg = newim.array()
        for d in self._objectlist:
            d.imclip(newimg).imclip(img)
            img[int(d.ymin()):int(d.ymax()),
                int(d.xmin()):int(d.xmax())] = newimg[int(d.ymin()):int(d.ymax()),
                                                      int(d.xmin()):int(d.xmax())] if not broadcast else newim.clone().resize(int(d.width()), int(d.height())).array()
        return self
    
    def meanmask(self):
        """Replace pixels within the foreground objects with the mean pixel color"""
        img = self.numpy()  # force writeable
        img[self.rectangular_mask() > 0] = self.meanchannel()  # in-place update
        return self

    
    def perceptualhash(self, bits=128, asbinary=False, asbytes=False, objmask=False):
        """Perceptual differential hash function.

        This function sets foreground objects to mean color, convert to greyscale, resize with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            objmask: [bool] if true, replace the foreground object masks with the mean color prior to computing
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection of background scenes by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex( bghash() )) which is equivalent to perceptualhash(asbinary=True)
       
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone() if not objmask else self.clone().meanmask()        
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    def fghash(self, bits=8, asbinary=False, asbytes=False):
        """Perceptual differential hash function, computed for each foreground region independently"""
        return [im.crop().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=False)  for im in self]

    
    def bghash(self, bits=128, asbinary=False, asbytes=False):
        """Percetual differential hash function, masking out foreground regions"""
        return self.clone().greyscale().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=True)
        
    def isduplicate(self, im, threshold, bits=128):
        """Background hash near duplicate detection, returns true if self and im are near duplicate images using bghash"""
        assert isinstance(im, Image), "Invalid input"
        return vipy.image.Image.perceptualhash_distance(self.bghash(bits=bits), im.bghash(bits=bits)) < threshold 
    
        
    def show(self, categories=None, figure=1, nocaption=False, nocaption_withstring=[], fontsize=10, boxalpha=0.15, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, captionoffset=(3,-18), nowindow=False, shortlabel=None, timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Show scene detection 

        Args:
           - categories: [list]  List of category names in the scene to show
           - fontsize: [int] or [str]: Size of the font, fontsize=int for points, fontsize='NN:scaled' to scale the font relative to the image size
           - figure: [int|str] Figure number or title, show the image in the provided figure=int numbered window
           - nocaption: [bool]  Show or do not show the text caption in the upper left of the box 
           - nocaption_withstring: [list]:  Do not show captions for those object categories containing any of the strings in the provided list
           - boxalpha (float, [0,1]):  Set the text box background to be semi-transparent with an alpha
           - d_category2color (dict):  Define a dictionary of required mapping of specific category() to box colors.  Non-specified categories are assigned a random named color from vipy.show.colorlist()
           - caption_offset (int, int): The relative position of the caption to the upper right corner of the box.
           - nowindow (bool):  Display or not display the image, used by `vipy.image.Scene.annotation`
           - shortlabel (dict):  An optional dictionary mapping category names to short names easier to display 
           - mutator (lambda):  A lambda function with signature lambda im: f(im) which will modify this image prior to show.  Useful for changing labels on the fly
           - timestampoffset (tuple): (x,y) coordinate offsets to shift the upper left corner timestamp
           - theme [str]: If 'dark' use dark mode, if 'light' use light mode to visualize captions with high contrast dark or light foregrounds 
        """
        colors = vipy.show.colorlist(theme)
        all_colors = vipy.show.colorlist()        
        textfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark'  else 'black'
        timestampfacecolor = 'black' if theme=='dark' else 'white'        
        textfacealpha = 0.8 if theme=='dark' else 0.85
        
        im = self.clone() if not mutator else mutator(self.clone())
        imdisplay = im.rgb() if im.colorspace() != 'rgb' else im.load()  # convert to RGB for show() if necessary
        
        valid_objects = [obj.clone() for obj in imdisplay.objects() if categories is None or obj.category() in tolist(categories)]  # Objects with valid category
        valid_objects = [obj.imclip(self.numpy()) for obj in valid_objects if obj.hasoverlap(self.numpy())]  # Objects within image rectangle
        valid_objects = [obj.new_category(shortlabel[obj.category()]) for obj in valid_objects] if shortlabel else valid_objects  # Display name as shortlabel?
        d_det_category_to_color = {d.category():colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(colors)] for d in valid_objects if isinstance(d, vipy.object.Detection)}
        d_kp_category_to_color = {d.category():all_colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(all_colors)] for d in valid_objects if isinstance(d, vipy.object.Keypoint2d)}        
        d_category_to_color = mergedict(d_kp_category_to_color, d_det_category_to_color, d_category2color)
        
        object_color = [d_category_to_color[d.category()] for d in valid_objects]                
        valid_objects  = [d if not any([c in d.category() for c in tolist(nocaption_withstring)]) else d.nocategory() for d in valid_objects]  # Objects requested to show without caption

        fontsize_scaled = float(fontsize.split(':')[0])*(min(imdisplay.shape())/640.0) if isstring(fontsize) else fontsize
        vipy.show.imobjects(imdisplay._array, valid_objects, bordercolor=object_color, textcolor=object_color, fignum=figure, do_caption=(nocaption==False), facealpha=boxalpha, fontsize=fontsize_scaled,
                            captionoffset=captionoffset, nowindow=nowindow, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestamp=timestamp,
                            timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestampoffset=timestampoffset)
        return self

    def annotate(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Alias for `vipy.image.Scene.savefig"""
        return self.savefig(outfile=outfile, 
                            categories=categories, 
                            figure=figure, 
                            nocaption=nocaption, 
                            fontsize=fontsize, 
                            boxalpha=boxalpha, 
                            d_category2color=d_category2color,
                            captionoffset=captionoffset, 
                            dpi=dpi, 
                            shortlabel=shortlabel, 
                            nocaption_withstring=nocaption_withstring, 
                            timestamp=timestamp,
                            theme=theme,
                            timestampoffset=timestampoffset,
                            mutator=mutator)

    def savefig(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, textfacecolor='white', shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Save `vipy.image.Scene.show output to given file or return buffer without popping up a window"""
        fignum = figure if figure is not None else 1        
        self.show(categories=categories, figure=fignum, nocaption=nocaption, fontsize=fontsize, boxalpha=boxalpha, 
                  d_category2color=d_category2color, captionoffset=captionoffset, nowindow=True, 
                  shortlabel=shortlabel, nocaption_withstring=nocaption_withstring, timestamp=timestamp,
                  mutator=mutator, timestampoffset=timestampoffset, theme=theme)
        
        if outfile is None:
            buf = io.BytesIO()
            (W,H) = plt.figure(num=fignum).canvas.get_width_height()  # fast(ish)
            plt.figure(num=fignum).canvas.print_raw(buf)  # fast(ish), FIXME: there is a bug here with captions showing behind bboxes on macos
            img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))
            if figure is None:
                vipy.show.close(plt.gcf().number)   # memory cleanup (useful for video annotation on last frame)
            return vipy.image.Image(array=img, colorspace='rgba').rgb()
        else:
            vipy.show.savefig(os.path.abspath(os.path.expanduser(outfile)), figure, dpi=dpi, bbox_inches='tight', pad_inches=0)
            return outfile

Ancestors

TaggedImage
Labeled
Image

Subclasses

ImageDetection

Instance variables

var attributes

Expand source code Browse git

class Scene(TaggedImage):
    """vipy.image.Scene class

    This class provides a representation of a vipy.image.TaggedImage with one or more vipy.object.Object.  The goal of this class is to provide a unified representation for all objects in a scene.

    Valid constructors include all provided by vipy.image.Image() and vipy.image.ImageCategory() with the additional kwarg 'objects', which is a list of vipy.object.Object()

    ```python
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city', objects=[vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='city').objects([vipy.object.Detection(category='vehicle', xmin=0, ymin=0, width=100, height=100)])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[0,0,100,100])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels='face', xywh=[[0,0,100,100], [100,100,200,200]])
    im = vipy.image.Scene(filename='/path/to/city_image.jpg', category='office', boxlabels=['face', 'desk'] xywh=[[0,0,100,100], [200,200,300,300]])
    ```

    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes', '_objectlist')
    
    def __init__(self, filename=None, url=None, category=None, attributes=None, objects=None, xywh=None, boxlabels=None, array=None, colorspace=None, tags=None):
        super().__init__(filename=filename, url=url, attributes=attributes, tags=tags, category=category, array=array, colorspace=colorspace)  
        self._objectlist = []

        if objects is not None:
            if not (isinstance(objects, list) and all([isinstance(bb, vipy.object.Object) for bb in objects])):
                raise ValueError("Invalid object list - Input must be [vipy.object.Object, ...]")
            self._objectlist = objects

        detlist = []
        if xywh is not None:
            if (islistoflists(xywh) or istupleoftuples(xywh)) and all([len(bb)==4 for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=bb) for bb in xywh]
            elif (islist(xywh) or isinstance(xywh, tuple)) and len(xywh)==4 and all([isnumber(bb) for bb in xywh]):
                detlist = [vipy.object.Detection(category=None, xywh=xywh)]
            else:
                raise ValueError("Invalid xywh list - Input must be [[x1,y1,w1,h1], ...")            
        if boxlabels is not None:
            if isstring(boxlabels):
                label = boxlabels
                detlist = [d.new_category(label) for d in detlist]
            elif (isinstance(boxlabels, tuple) or islist(boxlabels)) and len(boxlabels) == len(xywh):
                detlist = [d.new_category(label) for (d,label) in zip(detlist, boxlabels)]
            else:
                raise ValueError("Invalid boxlabels list - len(boxlabels) must be len(xywh) with corresponding labels for each xywh box  [label1, label2, ...]")

        self._objectlist = self._objectlist + detlist

        
    @classmethod
    def cast(cls, im):
        assert isinstance(im, vipy.image.Image), "Invalid input - must be derived from vipy.image.Image"
        if im.__class__ != vipy.image.Scene:
            return cls(filename=im._filename, url=im._url, attributes=im.attributes, array=im._array, colorspace=im._colorspace).loader(*im._loader)
        return im
    
    @classmethod
    def from_json(obj, s):
        im = super().from_json(s)
        im.__class__ = vipy.image.Scene
        d = {k.lstrip('_'):v for (k,v) in (json.loads(s) if not isinstance(s, dict) else s).items()}  # prettyjson (remove "_" prefix to attributes)
        if 'objectlist' in d and isinstance(d['objectlist'], dict):
            # Version 1.15.1: expanded serialization to support multiple object types
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']['Detection']] if 'Detection' in  d['objectlist'] else []
            im._objectlist += [vipy.object.Keypoint2d.from_json(s) for s in d['objectlist']['Keypoint2d']] if 'Keypoint2d' in  d['objectlist'] else []
        else:
            # Legacy support: 1.14.4
            im._objectlist = [vipy.object.Detection.from_json(s) for s in d['objectlist']]            
        return im

    def __json__(self):
        """Serialization method for json package"""
        return self.json(encode=True)

    def num_objects(self):
        return len(self._objectlist)
    
    def json(self, encode=True):
        d = {k.lstrip('_'):getattr(self, k) for k in Scene.__slots__ if getattr(self, k) is not None}  # prettyjson (remove "_" prefix to attributes)          
        d['objectlist'] = {'Detection': [bb.json(encode=False) for bb in self._objectlist if isinstance(bb, vipy.object.Detection)],
                           'Keypoint2d': [p.json(encode=False) for p in self._objectlist if isinstance(p, vipy.object.Keypoint2d)]}
        d['objectlist'] = {k:v for (k,v) in  d['objectlist'].items() if len(v) > 0}  # cleanup empty lists
        if 'attributes' in d and len(d['attributes'])==0:  # cleanup empty attributes
            del d['attributes']  # will be recreated in from_json
        if 'array' in d and d['array'] is not None:
            if self.hasfilename() or self.hasurl():
                log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() or self.save(), then reload the image from backing filename/url after json import')            
            d['array'] = self._array.tolist()        
        return json.dumps(d) if encode else d

        
    def __eq__(self, other):
        """Scene equality requires equality of all objects in the scene, assumes a total order of objects"""
        return isinstance(other, Scene) and len(self)==len(other) and all([obj1 == obj2 for (obj1, obj2) in zip(self, other)])

    def __repr__(self):
        strlist = []
        if self.isloaded():
            strlist.append("height=%d, width=%d, color=%s" % (self.height(), self.width(), self.colorspace()))
        elif self.has_loader():
            strlist.append('loaded=False')
        if self.filename() is not None:
            strlist.append('filename=%s' % (self.filename()))
        if self.hasurl():
            strlist.append('url=%s' % self.url())
        if len(self.image_tags())==1:
            strlist += ['category=%s' % truncate_string(str(self.category()), 40)]
        elif len(self.image_tags())>1:
            strlist += ['tags=%s' % truncate_string(str(self.image_tags()), 40)]            
        if len(self.objects()) > 0:
            strlist.append('objects=%d' % len(self.objects()))
            
        return str('<vipy.image.Scene: %s>' % (', '.join(strlist)))

    def __len__(self):
        """The length of a scene is equal to the number of objects present in the scene"""
        return len(self._objectlist)

    def __iter__(self):
        """Iterate over each ImageDetection() in the scene"""
        for (k, im) in enumerate(self._objectlist):
            yield self.__getitem__(k)

    def __getitem__(self, k):
        """Return the kth object in the scene as a `vipy.image.Scene` object """
        assert isinstance(k, int), "Indexing by object in scene must be integer"
        return self.clone(shallow=True).objects([self._objectlist[k].clone()])

    def image_tags(self, tags=None):
        """Return the image level tags of the scene"""
        return super().tags(tags)
    
    def tags(self, tags=None):
        """Return the image level and object level tags of the scene"""        
        if tags is not None:
            return super().tags(tags) 
        return super().tags() + self.object_tags()
    
    def load(self, verbose=False):
        super().load(verbose=verbose)
        if self.is_loaded() and self.num_objects() > 0 and any(o.has_normalized_coordinates() for o in self.objects()):
            # Normalized coordinates are in the range [0,1] relative to the (height, width) which is not known until load()
            self.objectmap(lambda o: o.scale_x(self.array().shape[1]).scale_y(self.array().shape[0]).del_attribute('normalized_coordinates') if o.has_normalized_coordinates() else o)
        return self
    
    def split(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene.
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return list(self)

    def split_and_recenter(self):
        """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene, with the scene centered on the object with zeropadding
        
        .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
        """
        return [im.clone().recenter(im.boundingbox().centroid()) for im in self.split()]
    
    def append_object(self, imdet):
        """Append the provided vipy.object.Detection object to the scene object list"""
        assert isinstance(imdet, vipy.object.Object), "Invalid input"
        self._objectlist.append(imdet)
        return self

    def add_object(self, imdet):
        """Alias for append"""        
        return self.append_object(imdet)
    
    def objects(self, objectlist=None):
        if objectlist is None:
            return self._objectlist
        else:
            assert isinstance(objectlist, list) and (len(objectlist) == 0 or all([isinstance(bb, vipy.object.Object) for bb in objectlist])), "Invalid object list"
            self._objectlist = objectlist
            return self

    def objectmap(self, f):
        """Apply lambda function f to each object.  If f is a list of lambda, apply one to one with the objects"""
        assert callable(f)
        self._objectlist = [f(obj)  for obj in self._objectlist] if not isinstance(f, list) else [g(obj) for (g,obj) in zip(f, self._objectlist)]
        assert all([isinstance(a, vipy.object.Object) for a in self.objects()]), "Lambda function must return vipy.object.Detection"
        return self

    def objectfilter(self, f):
        """Apply lambda function f to each object and keep if filter is True"""
        assert callable(f)
        self._objectlist = [obj for obj in self._objectlist if f(obj) is True]
        return self

    def nms(self, conf, iou, cover=0.8):
        """Non-maximum supporession of objects() by category based on confidence and spatial IoU and cover thresholds"""
        return self.objects( vipy.object.non_maximum_suppression(self.objects(), conf=conf, iou=iou, cover=cover, bycategory=True) )

    def intersection(self, other, miniou, bycategory=True):
        """Return a Scene() containing the objects in both self and other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(v.objects(), other.objects(), miniou, bycategory=bycategory)) if d is not None]
        return v

    def difference(self, other, miniou):
        """Return a Scene() containing the objects in self but not other, that overlap by miniou with greedy assignment"""
        assert isinstance(other, Scene), "Invalid input"
        v = self.clone()
        v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(self.objects(), other.objects(), miniou, bycategory=True)) if d is None]
        return v
        
    def union(self, other, miniou=None):
        """Combine the objects of the scene with other and self with no duplicate checking unless miniou is not None"""
        assert isinstance(other, Image)
        if isinstance(other, Scene):
            self.objects(self.objects()+other.objects())
        return self

    def __or__(self, other):
        super().__or__(other)
        return self.union(other)
    
    def uncrop(self, bb, shape):
        """Uncrop a previous crop(bb) called with the supplied bb=BoundingBox(), and zeropad to shape=(H,W)"""
        super().uncrop(bb, shape)
        return self.objectmap(lambda o: o.translate(bb.xmin(), bb.ymin()))
        
    def clear(self):
        """Remove all objects from this scene."""
        return self.objects([])
    
    def boundingbox(self):
        """The boundingbox of a scene is the union of all object bounding boxes, or None if there are no objects.  Load to compensate for normalized coordinates"""
        boxes = [vipy.geometry.BoundingBox.cast(bb) for bb in self.load().objects()]
        bb = boxes[0].clone() if len(boxes) >= 1 else None
        return bb.union(boxes[1:]) if len(boxes) >= 2 else bb

    def object_tags(self):
        """Return list of unique object tags in scene"""
        return list(dict.fromkeys([t for o in self.objects() for t in o.tags()]))
    
    # Spatial transformation
    def _history(self, func=None, **kwargs):
        """The undo history for flush. This is useful for remote processing of images at lower resolutions and square crops without passing around the image buffer"""
        if func is not None:
            self.append_attribute('_history', (func, kwargs))
            return self
        return self.getattribute('_history')

    def flush_array(self):
        return self.flush(undo_history=False)
    
    def flush(self, undo_history=True):
        """Free the image buffer, and undo all of the object transformations to restore alignment with the reference image filename/url"""
        if undo_history and self._history() is not None:
            for (f,kwargs) in reversed(self._history()):
                self.objectmap(lambda o: getattr(o,f)(**kwargs))  # undo
        self.delattribute('_history')
        return super().flush()
    
    def imclip(self):
        """Clip all bounding boxes to the image rectangle, silently rejecting those boxes that are degenerate or outside the image"""
        self._objectlist = [o.imclip(self.numpy()) for o in self._objectlist if o.hasoverlap(self.numpy())]
        return self

    def rescale(self, scale=1, interp='bilinear'):
        """Rescale image buffer and all bounding boxes - Not idempotent"""
        self = super().rescale(scale, interp=interp)
        self._objectlist = [bb.rescale(scale) for bb in self._objectlist]
        self._history('rescale', s=1/scale)
        return self

    def resize(self, cols=None, rows=None, height=None, width=None, interp='bilinear'):
        """Resize image buffer to (height=rows, width=cols) and transform all bounding boxes accordingly.  If cols or rows is None, then scale isotropically.  cols is a synonym for width, rows is a synonym for height"""
        assert not (cols is not None and width is not None), "Define either width or cols"
        assert not (rows is not None and height is not None), "Define either height or rows"
        rows = rows if height is None else height
        cols = cols if width is None else width        
        assert cols is not None or rows is not None, "Invalid input"
        
        sx = (float(cols) / self.width()) if cols is not None else None
        sy = (float(rows) / self.height()) if rows is not None else None
        sx = sy if sx is None else sx
        sy = sx if sy is None else sy        
        self._objectlist = [bb.scale_x(sx).scale_y(sy) for bb in self._objectlist]
        self._history('scale_x', s=1/sx)._history('scale_y', s=1/sy)
        if sx == sy:
            self = super().rescale(sx, interp=interp)  # FIXME: if we call resize here, inheritance is screweed up
        else:
            self = super().resize(cols, rows, interp=interp)
        return self

    def centersquare(self):
        """Crop the image of size (H,W) to be centersquare (min(H,W), min(H,W)) preserving center, and update bounding boxes"""
        (H,W) = self.shape()
        self = super().centersquare()
        (dy, dx) = ((H - self.height())/2.0, (W - self.width())/2.0)
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)
        return self
    
    def fliplr(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.fliplr(self.numpy()) for bb in self._objectlist]
        self._history('fliplr', width=self.width())
        self = super().fliplr()
        return self

    def flipud(self):
        """Mirror buffer and all bounding box around vertical axis"""
        self._objectlist = [bb.flipud(self.numpy()) for bb in self._objectlist]
        self._history('flipud', height=self.height())        
        self = super().flipud()
        return self
    
    def dilate(self, s):
        """Dilate all bounding boxes by scale factor, dilated boxes may be outside image rectangle"""
        self._objectlist = [bb.dilate(s) for bb in self._objectlist]
        return self

    def zeropad(self, padwidth, padheight):
        """Zero pad image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().zeropad(padwidth, padheight)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def meanpad(self, padwidth, padheight, mu=None):
        """Mean pad (image color mean) image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
        self = super().meanpad(padwidth, padheight, mu=mu)
        dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
        dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
        self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
        self._history('translate', dx=-dx, dy=-dy)
        return self

    def rot90cw(self):
        """Rotate the scene 90 degrees clockwise, and update objects"""
        (H,W) = self.shape()        
        self.array(np.rot90(self.numpy(), 3))
        self._objectlist = [bb.rot90cw(H, W) for bb in self._objectlist]
        self._history('rot90ccw', H=W, W=H)                
        return self

    def rot90ccw(self):
        """Rotate the scene 90 degrees counterclockwise, and update objects"""
        (H,W) = self.shape()
        self.array(np.rot90(self.numpy(), 1))
        self._objectlist = [bb.rot90ccw(H, W) for bb in self._objectlist]
        self._history('rot90cw', H=W, W=H)                        
        return self

    def maxdim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that maximum dimension of image = dim, update all objects"""
        return super().maxdim(dim, interp=interp) if dim is not None else max(self.shape())  # will call self.rescale() which will update boxes

    def mindim(self, dim=None, interp='bilinear'):
        """Resize scene preserving aspect ratio so that minimum dimension of image = dim, update all objects"""
        return super().mindim(dim, interp=interp) if dim is not None else min(self.shape())  # will call self.rescale() which will update boxes

    def crop(self, bbox=None):
        """Crop the image buffer using the supplied bounding box object (or the only object if bbox=None), clipping the box to the image rectangle, update all scene objects"""
        assert bbox is not None or (len(self) == 1), "Bounding box must be provided if number of objects != 1"
        bbox = bbox if bbox is not None else [o for o in self._objectlist if isinstance(o, vipy.geometry.BoundingBox)][0]
        self = super()._crop(bbox)        
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
        self._history('translate', dx=dx, dy=dy)                        
        return self

    def objectcrop(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate)) if bb is not None else self

    def objectsquare(self, dilate=1.0):
        """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor, setting to maxsquare prior to crop.  Crop will be zeropadded if outside the image rectangle."""
        bb = self.boundingbox()
        return self.padcrop(bb.dilate(dilate).maxsquare()) if bb is not None else self        
    
    def centercrop(self, height, width):
        """Crop image of size (height x width) in the center, keeping the image centroid constant"""
        return self.crop(BoundingBox(xcentroid=float(self.width() / 2.0), ycentroid=float(self.height() / 2.0), width=int(width), height=int(height)))

    def cornercrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning valid pixels only"""
        return self.crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))
    
    def padcrop(self, bbox):
        """Crop the image buffer using the supplied bounding box object, zero padding if box is outside image rectangle, update all scene objects"""
        bbox = bbox.clone()
        dx = int(max(0, max(0-bbox.xmin(), bbox.xmax()-self.width())))
        dy = int(max(0, max(0-bbox.ymin(), bbox.ymax()-self.height())))
        self.zeropad(dx,dy)._crop(bbox.translate(dx=dx, dy=dy))
        (dx, dy) = (bbox.xmin(), bbox.ymin())
        self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist] # after crop        
        self._history('translate', dx=dx, dy=dy)                                
        return self

    def cornerpadcrop(self, height, width):
        """Crop image of size (height x width) from the upper left corner, returning zero padded result out to (height, width)"""
        return self.padcrop(BoundingBox(xmin=0, ymin=0, width=width, height=height))
    
    # Image export
    def rectangular_mask(self, W=None, H=None):
        """Return a binary array of the same size as the image (or using the
        provided image width and height (W,H) size to avoid an image load),
        with ones inside all bounding boxes"""
        if (W is None or H is None):
            (H, W) = (int(np.round(self.height())),
                      int(np.round(self.width())))
        immask = np.zeros((H, W)).astype(np.uint8)
        for o in self._objectlist:
            if isinstance(o, vipy.geometry.BoundingBox) and o.hasoverlap(immask):
                bbm = o.clone().imclip(self.numpy()).int()
                immask[bbm.ymin():bbm.ymax(), bbm.xmin():bbm.xmax()] = 1
            if isinstance(o, vipy.geometry.Point2d) and o.boundingbox().hasoverlap(immask):
                mask = vipy.calibration.circle(o.x, o.y, o.r, W, H)
                immask[mask>0] = 1
        return immask

    def binarymask(self):
        """Alias for rectangular_mask with in-place update"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = mask[:]  # in-place update
        return self
        
    def bgmask(self):
        """Set all pixels outside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, mask)  # in-place update
        return self  

    def fgmask(self):
        """Set all pixels inside object bounding boxes to zero"""
        mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
        img = self.numpy()
        img[:] = np.multiply(img, 1.0-mask)  # in-place update
        return self
    
    def pixelmask(self, pixelsize=8):
        """Replace pixels within all foreground objects with a privacy preserving pixelated foreground with larger pixels (e.g. like privacy glass)"""
        assert pixelsize > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().rescale(1.0/pixelsize, interp='nearest').resize_like(self, interp='nearest').numpy()[mask > 0]  # in-place update
        return self

    def pixelize(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
    def pixelate(self, radius=16):
        """Alias for pixelmask"""
        return self.pixelmask(pixelsize=radius)
        
    
    def blurmask(self, radius=7):
        """Replace pixels within all foreground objects with a privacy preserving blurred foreground"""
        (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
        img[mask > 0] = self.clone().blur(radius).numpy()[mask > 0]  # in-place update
        return self

    def blurmask_only(self, categories, radius=7):
        """Replace pixels within all foreground objects with specified category with a privacy preserving blurred foreground"""
        assert radius > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"

        objects = self.objects()
        return self.clone().objects([o for o in objects if o.category() in categories]).blurmask(radius=radius).objects(objects)
    
    def replace(self, newim, broadcast=False):
        """Set all image values within the bounding box equal to the provided img, triggers load() and imclip()"""
        assert isinstance(newim, vipy.image.Image), "Invalid replacement image - Must be vipy.image.Image"
        img = self.numpy()        
        newimg = newim.array()
        for d in self._objectlist:
            d.imclip(newimg).imclip(img)
            img[int(d.ymin()):int(d.ymax()),
                int(d.xmin()):int(d.xmax())] = newimg[int(d.ymin()):int(d.ymax()),
                                                      int(d.xmin()):int(d.xmax())] if not broadcast else newim.clone().resize(int(d.width()), int(d.height())).array()
        return self
    
    def meanmask(self):
        """Replace pixels within the foreground objects with the mean pixel color"""
        img = self.numpy()  # force writeable
        img[self.rectangular_mask() > 0] = self.meanchannel()  # in-place update
        return self

    
    def perceptualhash(self, bits=128, asbinary=False, asbytes=False, objmask=False):
        """Perceptual differential hash function.

        This function sets foreground objects to mean color, convert to greyscale, resize with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
        
        Args:
            bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
            objmask: [bool] if true, replace the foreground object masks with the mean color prior to computing
            asbinary: [bool] If true, return a binary array
            asbytes: [bool] if true return a byte array

        Returns:
            A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
            asbytes: a bytes array
            asbinary: a numpy binary array            

        .. notes::
            - Can be used for near duplicate detection of background scenes by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
            - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex( bghash() )) which is equivalent to perceptualhash(asbinary=True)
       
        """        
        allowablebits = [2*k*k for k in range(2, 17)]
        assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
        sq = int(np.ceil(np.sqrt(bits/2.0)))
        im = self.clone() if not objmask else self.clone().meanmask()        
        b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
        return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

    def fghash(self, bits=8, asbinary=False, asbytes=False):
        """Perceptual differential hash function, computed for each foreground region independently"""
        return [im.crop().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=False)  for im in self]

    
    def bghash(self, bits=128, asbinary=False, asbytes=False):
        """Percetual differential hash function, masking out foreground regions"""
        return self.clone().greyscale().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=True)
        
    def isduplicate(self, im, threshold, bits=128):
        """Background hash near duplicate detection, returns true if self and im are near duplicate images using bghash"""
        assert isinstance(im, Image), "Invalid input"
        return vipy.image.Image.perceptualhash_distance(self.bghash(bits=bits), im.bghash(bits=bits)) < threshold 
    
        
    def show(self, categories=None, figure=1, nocaption=False, nocaption_withstring=[], fontsize=10, boxalpha=0.15, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, captionoffset=(3,-18), nowindow=False, shortlabel=None, timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Show scene detection 

        Args:
           - categories: [list]  List of category names in the scene to show
           - fontsize: [int] or [str]: Size of the font, fontsize=int for points, fontsize='NN:scaled' to scale the font relative to the image size
           - figure: [int|str] Figure number or title, show the image in the provided figure=int numbered window
           - nocaption: [bool]  Show or do not show the text caption in the upper left of the box 
           - nocaption_withstring: [list]:  Do not show captions for those object categories containing any of the strings in the provided list
           - boxalpha (float, [0,1]):  Set the text box background to be semi-transparent with an alpha
           - d_category2color (dict):  Define a dictionary of required mapping of specific category() to box colors.  Non-specified categories are assigned a random named color from vipy.show.colorlist()
           - caption_offset (int, int): The relative position of the caption to the upper right corner of the box.
           - nowindow (bool):  Display or not display the image, used by `vipy.image.Scene.annotation`
           - shortlabel (dict):  An optional dictionary mapping category names to short names easier to display 
           - mutator (lambda):  A lambda function with signature lambda im: f(im) which will modify this image prior to show.  Useful for changing labels on the fly
           - timestampoffset (tuple): (x,y) coordinate offsets to shift the upper left corner timestamp
           - theme [str]: If 'dark' use dark mode, if 'light' use light mode to visualize captions with high contrast dark or light foregrounds 
        """
        colors = vipy.show.colorlist(theme)
        all_colors = vipy.show.colorlist()        
        textfacecolor = 'black' if theme=='dark' else 'white'
        timestampcolor = 'white' if theme=='dark'  else 'black'
        timestampfacecolor = 'black' if theme=='dark' else 'white'        
        textfacealpha = 0.8 if theme=='dark' else 0.85
        
        im = self.clone() if not mutator else mutator(self.clone())
        imdisplay = im.rgb() if im.colorspace() != 'rgb' else im.load()  # convert to RGB for show() if necessary
        
        valid_objects = [obj.clone() for obj in imdisplay.objects() if categories is None or obj.category() in tolist(categories)]  # Objects with valid category
        valid_objects = [obj.imclip(self.numpy()) for obj in valid_objects if obj.hasoverlap(self.numpy())]  # Objects within image rectangle
        valid_objects = [obj.new_category(shortlabel[obj.category()]) for obj in valid_objects] if shortlabel else valid_objects  # Display name as shortlabel?
        d_det_category_to_color = {d.category():colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(colors)] for d in valid_objects if isinstance(d, vipy.object.Detection)}
        d_kp_category_to_color = {d.category():all_colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(all_colors)] for d in valid_objects if isinstance(d, vipy.object.Keypoint2d)}        
        d_category_to_color = mergedict(d_kp_category_to_color, d_det_category_to_color, d_category2color)
        
        object_color = [d_category_to_color[d.category()] for d in valid_objects]                
        valid_objects  = [d if not any([c in d.category() for c in tolist(nocaption_withstring)]) else d.nocategory() for d in valid_objects]  # Objects requested to show without caption

        fontsize_scaled = float(fontsize.split(':')[0])*(min(imdisplay.shape())/640.0) if isstring(fontsize) else fontsize
        vipy.show.imobjects(imdisplay._array, valid_objects, bordercolor=object_color, textcolor=object_color, fignum=figure, do_caption=(nocaption==False), facealpha=boxalpha, fontsize=fontsize_scaled,
                            captionoffset=captionoffset, nowindow=nowindow, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestamp=timestamp,
                            timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestampoffset=timestampoffset)
        return self

    def annotate(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Alias for `vipy.image.Scene.savefig"""
        return self.savefig(outfile=outfile, 
                            categories=categories, 
                            figure=figure, 
                            nocaption=nocaption, 
                            fontsize=fontsize, 
                            boxalpha=boxalpha, 
                            d_category2color=d_category2color,
                            captionoffset=captionoffset, 
                            dpi=dpi, 
                            shortlabel=shortlabel, 
                            nocaption_withstring=nocaption_withstring, 
                            timestamp=timestamp,
                            theme=theme,
                            timestampoffset=timestampoffset,
                            mutator=mutator)

    def savefig(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, textfacecolor='white', shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
        """Save `vipy.image.Scene.show output to given file or return buffer without popping up a window"""
        fignum = figure if figure is not None else 1        
        self.show(categories=categories, figure=fignum, nocaption=nocaption, fontsize=fontsize, boxalpha=boxalpha, 
                  d_category2color=d_category2color, captionoffset=captionoffset, nowindow=True, 
                  shortlabel=shortlabel, nocaption_withstring=nocaption_withstring, timestamp=timestamp,
                  mutator=mutator, timestampoffset=timestampoffset, theme=theme)
        
        if outfile is None:
            buf = io.BytesIO()
            (W,H) = plt.figure(num=fignum).canvas.get_width_height()  # fast(ish)
            plt.figure(num=fignum).canvas.print_raw(buf)  # fast(ish), FIXME: there is a bug here with captions showing behind bboxes on macos
            img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))
            if figure is None:
                vipy.show.close(plt.gcf().number)   # memory cleanup (useful for video annotation on last frame)
            return vipy.image.Image(array=img, colorspace='rgba').rgb()
        else:
            vipy.show.savefig(os.path.abspath(os.path.expanduser(outfile)), figure, dpi=dpi, bbox_inches='tight', pad_inches=0)
            return outfile

Methods

def add_object(self, imdet)

Alias for append

Expand source code Browse git

def add_object(self, imdet):
    """Alias for append"""        
    return self.append_object(imdet)

def annotate(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person': 'green', 'vehicle': 'blue', 'object': 'red'}, captionoffset=(3, -18), dpi=200, shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0, 0), theme='dark')

Alias for `vipy.image.Scene.savefig

Expand source code Browse git

def annotate(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
    """Alias for `vipy.image.Scene.savefig"""
    return self.savefig(outfile=outfile, 
                        categories=categories, 
                        figure=figure, 
                        nocaption=nocaption, 
                        fontsize=fontsize, 
                        boxalpha=boxalpha, 
                        d_category2color=d_category2color,
                        captionoffset=captionoffset, 
                        dpi=dpi, 
                        shortlabel=shortlabel, 
                        nocaption_withstring=nocaption_withstring, 
                        timestamp=timestamp,
                        theme=theme,
                        timestampoffset=timestampoffset,
                        mutator=mutator)

def append_object(self, imdet)

Append the provided vipy.object.Detection object to the scene object list

Expand source code Browse git

def append_object(self, imdet):
    """Append the provided vipy.object.Detection object to the scene object list"""
    assert isinstance(imdet, vipy.object.Object), "Invalid input"
    self._objectlist.append(imdet)
    return self

def bghash(self, bits=128, asbinary=False, asbytes=False)

Percetual differential hash function, masking out foreground regions

Expand source code Browse git

def bghash(self, bits=128, asbinary=False, asbytes=False):
    """Percetual differential hash function, masking out foreground regions"""
    return self.clone().greyscale().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=True)

def bgmask(self)

Set all pixels outside object bounding boxes to zero

Expand source code Browse git

def bgmask(self):
    """Set all pixels outside object bounding boxes to zero"""
    mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
    img = self.numpy()
    img[:] = np.multiply(img, mask)  # in-place update
    return self

def binarymask(self)

Alias for rectangular_mask with in-place update

Expand source code Browse git

def binarymask(self):
    """Alias for rectangular_mask with in-place update"""
    mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
    img = self.numpy()
    img[:] = mask[:]  # in-place update
    return self

def blurmask(self, radius=7)

Replace pixels within all foreground objects with a privacy preserving blurred foreground

Expand source code Browse git

def blurmask(self, radius=7):
    """Replace pixels within all foreground objects with a privacy preserving blurred foreground"""
    (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
    img[mask > 0] = self.clone().blur(radius).numpy()[mask > 0]  # in-place update
    return self

def blurmask_only(self, categories, radius=7)

Replace pixels within all foreground objects with specified category with a privacy preserving blurred foreground

Expand source code Browse git

def blurmask_only(self, categories, radius=7):
    """Replace pixels within all foreground objects with specified category with a privacy preserving blurred foreground"""
    assert radius > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"

    objects = self.objects()
    return self.clone().objects([o for o in objects if o.category() in categories]).blurmask(radius=radius).objects(objects)

def boundingbox(self)

The boundingbox of a scene is the union of all object bounding boxes, or None if there are no objects. Load to compensate for normalized coordinates

Expand source code Browse git

def boundingbox(self):
    """The boundingbox of a scene is the union of all object bounding boxes, or None if there are no objects.  Load to compensate for normalized coordinates"""
    boxes = [vipy.geometry.BoundingBox.cast(bb) for bb in self.load().objects()]
    bb = boxes[0].clone() if len(boxes) >= 1 else None
    return bb.union(boxes[1:]) if len(boxes) >= 2 else bb

def centersquare(self)

Crop the image of size (H,W) to be centersquare (min(H,W), min(H,W)) preserving center, and update bounding boxes

Expand source code Browse git

def centersquare(self):
    """Crop the image of size (H,W) to be centersquare (min(H,W), min(H,W)) preserving center, and update bounding boxes"""
    (H,W) = self.shape()
    self = super().centersquare()
    (dy, dx) = ((H - self.height())/2.0, (W - self.width())/2.0)
    self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
    self._history('translate', dx=dx, dy=dy)
    return self

def clear(self)

Remove all objects from this scene.

Expand source code Browse git

def clear(self):
    """Remove all objects from this scene."""
    return self.objects([])

def cornercrop(self, height, width)

Crop image of size (height x width) from the upper left corner, returning valid pixels only

Expand source code Browse git

def cornercrop(self, height, width):
    """Crop image of size (height x width) from the upper left corner, returning valid pixels only"""
    return self.crop(BoundingBox(xmin=0, ymin=0, width=int(width), height=int(height)))

def cornerpadcrop(self, height, width)

Crop image of size (height x width) from the upper left corner, returning zero padded result out to (height, width)

Expand source code Browse git

def cornerpadcrop(self, height, width):
    """Crop image of size (height x width) from the upper left corner, returning zero padded result out to (height, width)"""
    return self.padcrop(BoundingBox(xmin=0, ymin=0, width=width, height=height))

def crop(self, bbox=None)

Crop the image buffer using the supplied bounding box object (or the only object if bbox=None), clipping the box to the image rectangle, update all scene objects

Expand source code Browse git

def crop(self, bbox=None):
    """Crop the image buffer using the supplied bounding box object (or the only object if bbox=None), clipping the box to the image rectangle, update all scene objects"""
    assert bbox is not None or (len(self) == 1), "Bounding box must be provided if number of objects != 1"
    bbox = bbox if bbox is not None else [o for o in self._objectlist if isinstance(o, vipy.geometry.BoundingBox)][0]
    self = super()._crop(bbox)        
    (dx, dy) = (bbox.xmin(), bbox.ymin())
    self._objectlist = [bb.translate(-dx, -dy) for bb in self._objectlist]
    self._history('translate', dx=dx, dy=dy)                        
    return self

def difference(self, other, miniou)

Return a Scene() containing the objects in self but not other, that overlap by miniou with greedy assignment

Expand source code Browse git

def difference(self, other, miniou):
    """Return a Scene() containing the objects in self but not other, that overlap by miniou with greedy assignment"""
    assert isinstance(other, Scene), "Invalid input"
    v = self.clone()
    v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(self.objects(), other.objects(), miniou, bycategory=True)) if d is None]
    return v

def dilate(self, s)

Dilate all bounding boxes by scale factor, dilated boxes may be outside image rectangle

Expand source code Browse git

def dilate(self, s):
    """Dilate all bounding boxes by scale factor, dilated boxes may be outside image rectangle"""
    self._objectlist = [bb.dilate(s) for bb in self._objectlist]
    return self

def fghash(self, bits=8, asbinary=False, asbytes=False)

Perceptual differential hash function, computed for each foreground region independently

Expand source code Browse git

def fghash(self, bits=8, asbinary=False, asbytes=False):
    """Perceptual differential hash function, computed for each foreground region independently"""
    return [im.crop().perceptualhash(bits=bits, asbinary=asbinary, asbytes=asbytes, objmask=False)  for im in self]

def fgmask(self)

Set all pixels inside object bounding boxes to zero

Expand source code Browse git

def fgmask(self):
    """Set all pixels inside object bounding boxes to zero"""
    mask = self.rectangular_mask() if self.channels() == 1 else np.expand_dims(self.rectangular_mask(), axis=2)
    img = self.numpy()
    img[:] = np.multiply(img, 1.0-mask)  # in-place update
    return self

def fliplr(self)

Mirror buffer and all bounding box around vertical axis

Expand source code Browse git

def fliplr(self):
    """Mirror buffer and all bounding box around vertical axis"""
    self._objectlist = [bb.fliplr(self.numpy()) for bb in self._objectlist]
    self._history('fliplr', width=self.width())
    self = super().fliplr()
    return self

def flipud(self)

Mirror buffer and all bounding box around vertical axis

Expand source code Browse git

def flipud(self):
    """Mirror buffer and all bounding box around vertical axis"""
    self._objectlist = [bb.flipud(self.numpy()) for bb in self._objectlist]
    self._history('flipud', height=self.height())        
    self = super().flipud()
    return self

def flush(self, undo_history=True)

Free the image buffer, and undo all of the object transformations to restore alignment with the reference image filename/url

Expand source code Browse git

def flush(self, undo_history=True):
    """Free the image buffer, and undo all of the object transformations to restore alignment with the reference image filename/url"""
    if undo_history and self._history() is not None:
        for (f,kwargs) in reversed(self._history()):
            self.objectmap(lambda o: getattr(o,f)(**kwargs))  # undo
    self.delattribute('_history')
    return super().flush()

def flush_array(self)

Expand source code Browse git

def flush_array(self):
    return self.flush(undo_history=False)

def image_tags(self, tags=None)

Return the image level tags of the scene

Expand source code Browse git

def image_tags(self, tags=None):
    """Return the image level tags of the scene"""
    return super().tags(tags)

def imclip(self)

Clip all bounding boxes to the image rectangle, silently rejecting those boxes that are degenerate or outside the image

Expand source code Browse git

def imclip(self):
    """Clip all bounding boxes to the image rectangle, silently rejecting those boxes that are degenerate or outside the image"""
    self._objectlist = [o.imclip(self.numpy()) for o in self._objectlist if o.hasoverlap(self.numpy())]
    return self

def intersection(self, other, miniou, bycategory=True)

Return a Scene() containing the objects in both self and other, that overlap by miniou with greedy assignment

Expand source code Browse git

def intersection(self, other, miniou, bycategory=True):
    """Return a Scene() containing the objects in both self and other, that overlap by miniou with greedy assignment"""
    assert isinstance(other, Scene), "Invalid input"
    v = self.clone()
    v._objectlist = [v._objectlist[k] for (k,d) in enumerate(greedy_assignment(v.objects(), other.objects(), miniou, bycategory=bycategory)) if d is not None]
    return v

def isduplicate(self, im, threshold, bits=128)

Background hash near duplicate detection, returns true if self and im are near duplicate images using bghash

Expand source code Browse git

def isduplicate(self, im, threshold, bits=128):
    """Background hash near duplicate detection, returns true if self and im are near duplicate images using bghash"""
    assert isinstance(im, Image), "Invalid input"
    return vipy.image.Image.perceptualhash_distance(self.bghash(bits=bits), im.bghash(bits=bits)) < threshold

def json(self, encode=True)

Expand source code Browse git

def json(self, encode=True):
    d = {k.lstrip('_'):getattr(self, k) for k in Scene.__slots__ if getattr(self, k) is not None}  # prettyjson (remove "_" prefix to attributes)          
    d['objectlist'] = {'Detection': [bb.json(encode=False) for bb in self._objectlist if isinstance(bb, vipy.object.Detection)],
                       'Keypoint2d': [p.json(encode=False) for p in self._objectlist if isinstance(p, vipy.object.Keypoint2d)]}
    d['objectlist'] = {k:v for (k,v) in  d['objectlist'].items() if len(v) > 0}  # cleanup empty lists
    if 'attributes' in d and len(d['attributes'])==0:  # cleanup empty attributes
        del d['attributes']  # will be recreated in from_json
    if 'array' in d and d['array'] is not None:
        if self.hasfilename() or self.hasurl():
            log.warning('serializing pixel array to json is inefficient for large images.  Try self.flush() or self.save(), then reload the image from backing filename/url after json import')            
        d['array'] = self._array.tolist()        
    return json.dumps(d) if encode else d

def maxdim(self, dim=None, interp='bilinear')

Resize scene preserving aspect ratio so that maximum dimension of image = dim, update all objects

Expand source code Browse git

def maxdim(self, dim=None, interp='bilinear'):
    """Resize scene preserving aspect ratio so that maximum dimension of image = dim, update all objects"""
    return super().maxdim(dim, interp=interp) if dim is not None else max(self.shape())  # will call self.rescale() which will update boxes

def meanmask(self)

Replace pixels within the foreground objects with the mean pixel color

Expand source code Browse git

def meanmask(self):
    """Replace pixels within the foreground objects with the mean pixel color"""
    img = self.numpy()  # force writeable
    img[self.rectangular_mask() > 0] = self.meanchannel()  # in-place update
    return self

def meanpad(self, padwidth, padheight, mu=None)

Mean pad (image color mean) image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets

Expand source code Browse git

def meanpad(self, padwidth, padheight, mu=None):
    """Mean pad (image color mean) image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
    self = super().meanpad(padwidth, padheight, mu=mu)
    dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
    dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
    self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
    self._history('translate', dx=-dx, dy=-dy)
    return self

def mindim(self, dim=None, interp='bilinear')

Resize scene preserving aspect ratio so that minimum dimension of image = dim, update all objects

Expand source code Browse git

def mindim(self, dim=None, interp='bilinear'):
    """Resize scene preserving aspect ratio so that minimum dimension of image = dim, update all objects"""
    return super().mindim(dim, interp=interp) if dim is not None else min(self.shape())  # will call self.rescale() which will update boxes

def nms(self, conf, iou, cover=0.8)

Non-maximum supporession of objects() by category based on confidence and spatial IoU and cover thresholds

Expand source code Browse git

def nms(self, conf, iou, cover=0.8):
    """Non-maximum supporession of objects() by category based on confidence and spatial IoU and cover thresholds"""
    return self.objects( vipy.object.non_maximum_suppression(self.objects(), conf=conf, iou=iou, cover=cover, bycategory=True) )

def num_objects(self)

Expand source code Browse git

def num_objects(self):
    return len(self._objectlist)

def object_tags(self)

Return list of unique object tags in scene

Expand source code Browse git

def object_tags(self):
    """Return list of unique object tags in scene"""
    return list(dict.fromkeys([t for o in self.objects() for t in o.tags()]))

def objectcrop(self, dilate=1.0)

Crop image using the Scene.boundingbox() with dilation factor. Crop will be zeropadded if outside the image rectangle.

Expand source code Browse git

def objectcrop(self, dilate=1.0):
    """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor.  Crop will be zeropadded if outside the image rectangle."""
    bb = self.boundingbox()
    return self.padcrop(bb.dilate(dilate)) if bb is not None else self

def objectfilter(self, f)

Apply lambda function f to each object and keep if filter is True

Expand source code Browse git

def objectfilter(self, f):
    """Apply lambda function f to each object and keep if filter is True"""
    assert callable(f)
    self._objectlist = [obj for obj in self._objectlist if f(obj) is True]
    return self

def objectmap(self, f)

Apply lambda function f to each object. If f is a list of lambda, apply one to one with the objects

Expand source code Browse git

def objectmap(self, f):
    """Apply lambda function f to each object.  If f is a list of lambda, apply one to one with the objects"""
    assert callable(f)
    self._objectlist = [f(obj)  for obj in self._objectlist] if not isinstance(f, list) else [g(obj) for (g,obj) in zip(f, self._objectlist)]
    assert all([isinstance(a, vipy.object.Object) for a in self.objects()]), "Lambda function must return vipy.object.Detection"
    return self

def objects(self, objectlist=None)

Expand source code Browse git

def objects(self, objectlist=None):
    if objectlist is None:
        return self._objectlist
    else:
        assert isinstance(objectlist, list) and (len(objectlist) == 0 or all([isinstance(bb, vipy.object.Object) for bb in objectlist])), "Invalid object list"
        self._objectlist = objectlist
        return self

def objectsquare(self, dilate=1.0)

Crop image using the Scene.boundingbox() with dilation factor, setting to maxsquare prior to crop. Crop will be zeropadded if outside the image rectangle.

Expand source code Browse git

def objectsquare(self, dilate=1.0):
    """Crop image using the `vipy.image.Scene.boundingbox` with dilation factor, setting to maxsquare prior to crop.  Crop will be zeropadded if outside the image rectangle."""
    bb = self.boundingbox()
    return self.padcrop(bb.dilate(dilate).maxsquare()) if bb is not None else self

def perceptualhash(self, bits=128, asbinary=False, asbytes=False, objmask=False)

Perceptual differential hash function.

This function sets foreground objects to mean color, convert to greyscale, resize with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.

Args

bits: [int] longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
objmask: [bool] if true, replace the foreground object masks with the mean color prior to computing
asbinary: [bool] If true, return a binary array
asbytes: [bool] if true return a byte array

Returns

A hash string encoding the perceptual hash such that Image.perceptualhash_distance() can be used to compute a hash distance
asbytes: a bytes array
asbinary: a numpy binary array

Notes

Can be used for near duplicate detection of background scenes by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing. Equivalently, Image.perceptualhash_distance().
The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex( bghash() )) which is equivalent to perceptualhash(asbinary=True)

Expand source code Browse git

def perceptualhash(self, bits=128, asbinary=False, asbytes=False, objmask=False):
    """Perceptual differential hash function.

    This function sets foreground objects to mean color, convert to greyscale, resize with linear interpolation to small image based on desired bit encoding, compute vertical and horizontal gradient signs.
    
    Args:
        bits: [int]  longer hashes have lower TAR (true accept rate, some near dupes are missed), but lower FAR (false accept rate), shorter hashes have higher TAR (fewer near-dupes are missed) but higher FAR (more non-dupes are declared as dupes).
        objmask: [bool] if true, replace the foreground object masks with the mean color prior to computing
        asbinary: [bool] If true, return a binary array
        asbytes: [bool] if true return a byte array

    Returns:
        A hash string encoding the perceptual hash such that `vipy.image.Image.perceptualhash_distance` can be used to compute a hash distance
        asbytes: a bytes array
        asbinary: a numpy binary array            

    .. notes::
        - Can be used for near duplicate detection of background scenes by unpacking the returned hex string to binary and computing hamming distance, or performing hamming based nearest neighbor indexing.  Equivalently, `vipy.image.Image.perceptualhash_distance`.
        - The default packed hex output can be converted to binary as: np.unpackbits(bytearray().fromhex( bghash() )) which is equivalent to perceptualhash(asbinary=True)
   
    """        
    allowablebits = [2*k*k for k in range(2, 17)]
    assert bits in allowablebits, "Bits must be in %s" % str(allowablebits)
    sq = int(np.ceil(np.sqrt(bits/2.0)))
    im = self.clone() if not objmask else self.clone().meanmask()        
    b = (np.dstack(np.gradient(im.resize(cols=sq+1, rows=sq+1).greyscale().numpy()))[0:-1, 0:-1] > 0).flatten()
    return bytes(np.packbits(b)).hex() if not (asbytes or asbinary) else bytes(np.packbits(b)) if asbytes else b

def pixelate(self, radius=16)

Alias for pixelmask

Expand source code Browse git

def pixelate(self, radius=16):
    """Alias for pixelmask"""
    return self.pixelmask(pixelsize=radius)

def pixelize(self, radius=16)

Alias for pixelmask

Expand source code Browse git

def pixelize(self, radius=16):
    """Alias for pixelmask"""
    return self.pixelmask(pixelsize=radius)

def pixelmask(self, pixelsize=8)

Replace pixels within all foreground objects with a privacy preserving pixelated foreground with larger pixels (e.g. like privacy glass)

Expand source code Browse git

def pixelmask(self, pixelsize=8):
    """Replace pixels within all foreground objects with a privacy preserving pixelated foreground with larger pixels (e.g. like privacy glass)"""
    assert pixelsize > 1, "Pixelsize is a scale factor such that pixels within the foreground are pixelsize times larger than the background"
    (img, mask) = (self.numpy(), self.rectangular_mask())  # force writeable
    img[mask > 0] = self.clone().rescale(1.0/pixelsize, interp='nearest').resize_like(self, interp='nearest').numpy()[mask > 0]  # in-place update
    return self

def rectangular_mask(self, W=None, H=None)

Return a binary array of the same size as the image (or using the provided image width and height (W,H) size to avoid an image load), with ones inside all bounding boxes

Expand source code Browse git

def rectangular_mask(self, W=None, H=None):
    """Return a binary array of the same size as the image (or using the
    provided image width and height (W,H) size to avoid an image load),
    with ones inside all bounding boxes"""
    if (W is None or H is None):
        (H, W) = (int(np.round(self.height())),
                  int(np.round(self.width())))
    immask = np.zeros((H, W)).astype(np.uint8)
    for o in self._objectlist:
        if isinstance(o, vipy.geometry.BoundingBox) and o.hasoverlap(immask):
            bbm = o.clone().imclip(self.numpy()).int()
            immask[bbm.ymin():bbm.ymax(), bbm.xmin():bbm.xmax()] = 1
        if isinstance(o, vipy.geometry.Point2d) and o.boundingbox().hasoverlap(immask):
            mask = vipy.calibration.circle(o.x, o.y, o.r, W, H)
            immask[mask>0] = 1
    return immask

def replace(self, newim, broadcast=False)

Set all image values within the bounding box equal to the provided img, triggers load() and imclip()

Expand source code Browse git

def replace(self, newim, broadcast=False):
    """Set all image values within the bounding box equal to the provided img, triggers load() and imclip()"""
    assert isinstance(newim, vipy.image.Image), "Invalid replacement image - Must be vipy.image.Image"
    img = self.numpy()        
    newimg = newim.array()
    for d in self._objectlist:
        d.imclip(newimg).imclip(img)
        img[int(d.ymin()):int(d.ymax()),
            int(d.xmin()):int(d.xmax())] = newimg[int(d.ymin()):int(d.ymax()),
                                                  int(d.xmin()):int(d.xmax())] if not broadcast else newim.clone().resize(int(d.width()), int(d.height())).array()
    return self

def rescale(self, scale=1, interp='bilinear')

Rescale image buffer and all bounding boxes - Not idempotent

Expand source code Browse git

def rescale(self, scale=1, interp='bilinear'):
    """Rescale image buffer and all bounding boxes - Not idempotent"""
    self = super().rescale(scale, interp=interp)
    self._objectlist = [bb.rescale(scale) for bb in self._objectlist]
    self._history('rescale', s=1/scale)
    return self

def resize(self, cols=None, rows=None, height=None, width=None, interp='bilinear')

Resize image buffer to (height=rows, width=cols) and transform all bounding boxes accordingly. If cols or rows is None, then scale isotropically. cols is a synonym for width, rows is a synonym for height

Expand source code Browse git

def resize(self, cols=None, rows=None, height=None, width=None, interp='bilinear'):
    """Resize image buffer to (height=rows, width=cols) and transform all bounding boxes accordingly.  If cols or rows is None, then scale isotropically.  cols is a synonym for width, rows is a synonym for height"""
    assert not (cols is not None and width is not None), "Define either width or cols"
    assert not (rows is not None and height is not None), "Define either height or rows"
    rows = rows if height is None else height
    cols = cols if width is None else width        
    assert cols is not None or rows is not None, "Invalid input"
    
    sx = (float(cols) / self.width()) if cols is not None else None
    sy = (float(rows) / self.height()) if rows is not None else None
    sx = sy if sx is None else sx
    sy = sx if sy is None else sy        
    self._objectlist = [bb.scale_x(sx).scale_y(sy) for bb in self._objectlist]
    self._history('scale_x', s=1/sx)._history('scale_y', s=1/sy)
    if sx == sy:
        self = super().rescale(sx, interp=interp)  # FIXME: if we call resize here, inheritance is screweed up
    else:
        self = super().resize(cols, rows, interp=interp)
    return self

def rot90ccw(self)

Rotate the scene 90 degrees counterclockwise, and update objects

Expand source code Browse git

def rot90ccw(self):
    """Rotate the scene 90 degrees counterclockwise, and update objects"""
    (H,W) = self.shape()
    self.array(np.rot90(self.numpy(), 1))
    self._objectlist = [bb.rot90ccw(H, W) for bb in self._objectlist]
    self._history('rot90cw', H=W, W=H)                        
    return self

def rot90cw(self)

Rotate the scene 90 degrees clockwise, and update objects

Expand source code Browse git

def rot90cw(self):
    """Rotate the scene 90 degrees clockwise, and update objects"""
    (H,W) = self.shape()        
    self.array(np.rot90(self.numpy(), 3))
    self._objectlist = [bb.rot90cw(H, W) for bb in self._objectlist]
    self._history('rot90ccw', H=W, W=H)                
    return self

def savefig(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person': 'green', 'vehicle': 'blue', 'object': 'red'}, captionoffset=(3, -18), dpi=200, textfacecolor='white', shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0, 0), theme='dark')

Save `vipy.image.Scene.show output to given file or return buffer without popping up a window

Expand source code Browse git

def savefig(self, outfile=None, categories=None, figure=1, nocaption=False, fontsize=10, boxalpha=0.15, d_category2color={'person':'green', 'vehicle':'blue', 'object':'red'}, captionoffset=(3,-18), dpi=200, textfacecolor='white', shortlabel=None, nocaption_withstring=[], timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
    """Save `vipy.image.Scene.show output to given file or return buffer without popping up a window"""
    fignum = figure if figure is not None else 1        
    self.show(categories=categories, figure=fignum, nocaption=nocaption, fontsize=fontsize, boxalpha=boxalpha, 
              d_category2color=d_category2color, captionoffset=captionoffset, nowindow=True, 
              shortlabel=shortlabel, nocaption_withstring=nocaption_withstring, timestamp=timestamp,
              mutator=mutator, timestampoffset=timestampoffset, theme=theme)
    
    if outfile is None:
        buf = io.BytesIO()
        (W,H) = plt.figure(num=fignum).canvas.get_width_height()  # fast(ish)
        plt.figure(num=fignum).canvas.print_raw(buf)  # fast(ish), FIXME: there is a bug here with captions showing behind bboxes on macos
        img = np.frombuffer(buf.getbuffer(), dtype=np.uint8).reshape((H, W, 4))
        if figure is None:
            vipy.show.close(plt.gcf().number)   # memory cleanup (useful for video annotation on last frame)
        return vipy.image.Image(array=img, colorspace='rgba').rgb()
    else:
        vipy.show.savefig(os.path.abspath(os.path.expanduser(outfile)), figure, dpi=dpi, bbox_inches='tight', pad_inches=0)
        return outfile

def show(self, categories=None, figure=1, nocaption=False, nocaption_withstring=[], fontsize=10, boxalpha=0.15, d_category2color={'Person': 'green', 'Vehicle': 'blue', 'Object': 'red'}, captionoffset=(3, -18), nowindow=False, shortlabel=None, timestamp=None, mutator=None, timestampoffset=(0, 0), theme='dark')

Show scene detection

Args

categories: [list] List of category names in the scene to show
fontsize: [int] or [str]: Size of the font, fontsize=int for points, fontsize='NN:scaled' to scale the font relative to the image size
figure: [int|str] Figure number or title, show the image in the provided figure=int numbered window
nocaption: [bool] Show or do not show the text caption in the upper left of the box
nocaption_withstring: [list]: Do not show captions for those object categories containing any of the strings in the provided list
boxalpha (float, [0,1]): Set the text box background to be semi-transparent with an alpha
d_category2color (dict): Define a dictionary of required mapping of specific category() to box colors. Non-specified categories are assigned a random named color from vipy.show.colorlist()
caption_offset (int, int): The relative position of the caption to the upper right corner of the box.
nowindow (bool): Display or not display the image, used by vipy.image.Scene.annotation
shortlabel (dict): An optional dictionary mapping category names to short names easier to display
mutator (lambda): A lambda function with signature lambda im: f(im) which will modify this image prior to show. Useful for changing labels on the fly
timestampoffset (tuple): (x,y) coordinate offsets to shift the upper left corner timestamp
theme [str]: If 'dark' use dark mode, if 'light' use light mode to visualize captions with high contrast dark or light foregrounds

Expand source code Browse git

def show(self, categories=None, figure=1, nocaption=False, nocaption_withstring=[], fontsize=10, boxalpha=0.15, d_category2color={'Person':'green', 'Vehicle':'blue', 'Object':'red'}, captionoffset=(3,-18), nowindow=False, shortlabel=None, timestamp=None, mutator=None, timestampoffset=(0,0), theme='dark'):
    """Show scene detection 

    Args:
       - categories: [list]  List of category names in the scene to show
       - fontsize: [int] or [str]: Size of the font, fontsize=int for points, fontsize='NN:scaled' to scale the font relative to the image size
       - figure: [int|str] Figure number or title, show the image in the provided figure=int numbered window
       - nocaption: [bool]  Show or do not show the text caption in the upper left of the box 
       - nocaption_withstring: [list]:  Do not show captions for those object categories containing any of the strings in the provided list
       - boxalpha (float, [0,1]):  Set the text box background to be semi-transparent with an alpha
       - d_category2color (dict):  Define a dictionary of required mapping of specific category() to box colors.  Non-specified categories are assigned a random named color from vipy.show.colorlist()
       - caption_offset (int, int): The relative position of the caption to the upper right corner of the box.
       - nowindow (bool):  Display or not display the image, used by `vipy.image.Scene.annotation`
       - shortlabel (dict):  An optional dictionary mapping category names to short names easier to display 
       - mutator (lambda):  A lambda function with signature lambda im: f(im) which will modify this image prior to show.  Useful for changing labels on the fly
       - timestampoffset (tuple): (x,y) coordinate offsets to shift the upper left corner timestamp
       - theme [str]: If 'dark' use dark mode, if 'light' use light mode to visualize captions with high contrast dark or light foregrounds 
    """
    colors = vipy.show.colorlist(theme)
    all_colors = vipy.show.colorlist()        
    textfacecolor = 'black' if theme=='dark' else 'white'
    timestampcolor = 'white' if theme=='dark'  else 'black'
    timestampfacecolor = 'black' if theme=='dark' else 'white'        
    textfacealpha = 0.8 if theme=='dark' else 0.85
    
    im = self.clone() if not mutator else mutator(self.clone())
    imdisplay = im.rgb() if im.colorspace() != 'rgb' else im.load()  # convert to RGB for show() if necessary
    
    valid_objects = [obj.clone() for obj in imdisplay.objects() if categories is None or obj.category() in tolist(categories)]  # Objects with valid category
    valid_objects = [obj.imclip(self.numpy()) for obj in valid_objects if obj.hasoverlap(self.numpy())]  # Objects within image rectangle
    valid_objects = [obj.new_category(shortlabel[obj.category()]) for obj in valid_objects] if shortlabel else valid_objects  # Display name as shortlabel?
    d_det_category_to_color = {d.category():colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(colors)] for d in valid_objects if isinstance(d, vipy.object.Detection)}
    d_kp_category_to_color = {d.category():all_colors[int(hashlib.sha1(str(d.category()).encode('utf-8')).hexdigest(), 16) % len(all_colors)] for d in valid_objects if isinstance(d, vipy.object.Keypoint2d)}        
    d_category_to_color = mergedict(d_kp_category_to_color, d_det_category_to_color, d_category2color)
    
    object_color = [d_category_to_color[d.category()] for d in valid_objects]                
    valid_objects  = [d if not any([c in d.category() for c in tolist(nocaption_withstring)]) else d.nocategory() for d in valid_objects]  # Objects requested to show without caption

    fontsize_scaled = float(fontsize.split(':')[0])*(min(imdisplay.shape())/640.0) if isstring(fontsize) else fontsize
    vipy.show.imobjects(imdisplay._array, valid_objects, bordercolor=object_color, textcolor=object_color, fignum=figure, do_caption=(nocaption==False), facealpha=boxalpha, fontsize=fontsize_scaled,
                        captionoffset=captionoffset, nowindow=nowindow, textfacecolor=textfacecolor, textfacealpha=textfacealpha, timestamp=timestamp,
                        timestampcolor=timestampcolor, timestampfacecolor=timestampfacecolor, timestampoffset=timestampoffset)
    return self

def split(self)

Split a scene with K objects into a list of K Scene objects, each with one object in the scene.

Note: The pixel buffer is shared between each split. Use [im.clone() for im in self.split()] for an explicit copy.

Expand source code Browse git

def split(self):
    """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene.
    
    .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
    """
    return list(self)

def split_and_recenter(self)

Split a scene with K objects into a list of K Scene objects, each with one object in the scene, with the scene centered on the object with zeropadding

Note: The pixel buffer is shared between each split. Use [im.clone() for im in self.split()] for an explicit copy.

Expand source code Browse git

def split_and_recenter(self):
    """Split a scene with K objects into a list of K `vipy.image.Scene` objects, each with one object in the scene, with the scene centered on the object with zeropadding
    
    .. note:: The pixel buffer is shared between each split.  Use [im.clone() for im in self.split()] for an explicit copy.
    """
    return [im.clone().recenter(im.boundingbox().centroid()) for im in self.split()]

def tags(self, tags=None)

Return the image level and object level tags of the scene

Expand source code Browse git

def tags(self, tags=None):
    """Return the image level and object level tags of the scene"""        
    if tags is not None:
        return super().tags(tags) 
    return super().tags() + self.object_tags()

def uncrop(self, bb, shape)

Uncrop a previous crop(bb) called with the supplied bb=BoundingBox(), and zeropad to shape=(H,W)

Expand source code Browse git

def uncrop(self, bb, shape):
    """Uncrop a previous crop(bb) called with the supplied bb=BoundingBox(), and zeropad to shape=(H,W)"""
    super().uncrop(bb, shape)
    return self.objectmap(lambda o: o.translate(bb.xmin(), bb.ymin()))

def union(self, other, miniou=None)

Combine the objects of the scene with other and self with no duplicate checking unless miniou is not None

Expand source code Browse git

def union(self, other, miniou=None):
    """Combine the objects of the scene with other and self with no duplicate checking unless miniou is not None"""
    assert isinstance(other, Image)
    if isinstance(other, Scene):
        self.objects(self.objects()+other.objects())
    return self

def zeropad(self, padwidth, padheight)

Zero pad image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets

Expand source code Browse git

def zeropad(self, padwidth, padheight):
    """Zero pad image with padwidth cols before and after and padheight rows before and after, then update bounding box offsets"""
    self = super().zeropad(padwidth, padheight)
    dx = padwidth[0] if isinstance(padwidth, tuple) and len(padwidth) == 2 else padwidth
    dy = padheight[0] if isinstance(padheight, tuple) and len(padheight) == 2 else padheight
    self._objectlist = [bb.translate(dx, dy) for bb in self._objectlist]
    self._history('translate', dx=-dx, dy=-dy)
    return self

Inherited members

TaggedImage:
- PIL_loader
- abspath
- add_soft_tag
- add_soft_tags
- additive_noise
- affine_transform
- alpha
- alphapad
- append_attribute
- area
- array
- ascii
- aspectratio
- base64
- bgr
- bgra
- bias
- blend
- blue
- blur
- bone
- border_mask
- bytes_array_loader
- canload
- cast
- centercrop
- centerpixel
- centroid
- channel
- channelmean
- channels
- clear_filename
- clone
- close
- closeall
- color_transform
- colorspace
- colorspace_like
- dict
- download
- downloadif
- exif
- face_blur
- face_detection
- face_pixelize
- filename
- filesize
- float
- from_json
- from_torch
- from_uri
- fromarray
- fromtorch
- gain
- get_attribute
- getattribute
- gray
- grayscale
- green
- grey
- greyscale
- has_filename
- has_soft_tags
- has_url
- hasfilename
- hasurl
- height
- hot
- hsv
- html
- imagebox
- intensity
- is_downloaded
- is_loaded
- iscolor
- isdownloaded
- isgrey
- isloaded
- isluminance
- istransparent
- jet
- load
- loaded
- loader
- lum
- luminance
- map
- mat2gray
- maxmatte
- maxsquare
- mean
- meanchannel
- metadata
- mindimn
- minsquare
- normalize
- numpy
- pad
- padcrop
- perceptualhash_distance
- person_detection
- pil
- pkl
- pklif
- print
- rainbow
- recenter
- red
- reload
- relpath
- resize_like
- restore
- rgb
- rgba
- rotate
- rotate_by_exif
- sanitize
- saturate
- save
- saveas
- saveastmp
- savetmp
- set_attribute
- setattributes
- shape
- soft_tags
- splat
- store
- sum_to_one
- tile
- tocache
- tonumpy
- torch
- try_download
- try_load
- uncache
- unload
- unstore
- untile
- uri
- url
- viewport
- width
- zeropadlike
- zeros

class TaggedImage (filename=None, url=None, attributes=None, array=None, colorspace=None, tags=None, category=None, confidence=None, caption=None)

vipy.image.TaggedImage class

This class provides a representation of a vipy.image.Image with one or more tags.

Valid constructors include all provided by vipy.image.Image with additional labels that provide ground truth for the content of the image.

im = vipy.image.TaggedImage(filename='/path/to/dog.jpg', tags={'dog','canine'})

Expand source code Browse git

class TaggedImage(Labeled):
    """vipy.image.TaggedImage class

    This class provides a representation of a vipy.image.Image with one or more tags.

    Valid constructors include all provided by vipy.image.Image with additional labels that provide ground truth for the content of the image. 

    ```python
    im = vipy.image.TaggedImage(filename='/path/to/dog.jpg', tags={'dog','canine'})
    ```
    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')        
    def __init__(self, filename=None, url=None, attributes=None, array=None, colorspace=None, tags=None, category=None, confidence=None, caption=None):
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)
        
        tags = ([category] if category is not None else []) + (tolist(tags) if tags is not None else [])
        if len(tags) > 0:
            self.set_attribute('tags', tags)
        if caption is not None:
            self.captions(caption)
            
    def __repr__(self):
        fields  = ['category=%s' % self.category()] if len(self.tags())==1 else []
        fields += ['caption=%s' % truncate_string(self.caption(), 40)] if self.caption() is not None else []        
        fields +=  ['confidence=%1.3f' % self.confidence()] if len(self.tags())==1 and self.confidence() is not None else []
        fields +=  ['tags=%s' % truncate_string(str(self.tags()), 40)] if len(self.tags())>1 else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.TaggedImage').replace('>', ', %s>' % ', '.join(fields))
        

    @classmethod
    def from_json(cls, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)

    def category(self):
        return self.attributes['tags'][0] if 'tags' in self.attributes else None

    def new_category(self, c):
        self.attributes['tags'] = [c]
        self.del_attribute('confidences')
        return self
        
    def confidence(self, tag=None, default=None):
        t = tag if tag is not None else self.category()
        return self.get_attribute('confidences')[t] if self.hasattribute('confidences') and t in self.attributes['confidences'] else default

    def has_tag(self, t):
        return t in self.tags()
    
    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('tags', tolist(tags))        
        return self.attributes['tags'] if 'tags' in self.attributes else []
    
    def add_tag(self, tag, confidence=None):
        self.append_attribute('tags', tag)
        if confidence is not None:
            if not self.hasattribute('confidences'):
                self.set_attribute('confidences', {})
            self.attributes['confidences'][tag] = confidence
        return self

    def add_caption(self, caption):
        self.append_attribute('captions', caption)
        return self
    
    def caption(self):
        return self.get_attribute('captions')[0] if self.hasattribute('captions') else None
    
    def captions(self, captions=None):
        if captions is not None:
            return self.set_attribute('captions', tolist(captions))
        return self.get_attribute('captions') if self.hasattribute('captions') else []
    
    def add_tags(self, tags, confidences=[]):
        for (t,c) in zip_longest(tags, confidences):
            self.add_tag(t, c)
        return self

    def clear_tags(self):        
        self.set_attribute('tags',[])
        if 'confidences' in self.attributes:
            del self.attributes['confidences']
        return self
    
    def add_soft_tags(self, soft_tags):
        """Soft tags are a list of (tag, confidence) tuples"""
        for (t,c) in soft_tags:
            self.add_tag(t, c)
        return self

    def add_soft_tag(self, soft_tag):
        """A soft tag is a tuple of (tag, confidence)"""
        return self.add_tag(*soft_tag)
    
    def soft_tags(self):
        """Soft tags are a list of (tag, confidence) tuples.  Will return only those tags with associated confidences.  Will return empty tuple if there are tags but no confidences"""
        return tuple((t, self.attributes['confidences'].get(t)) for t in self.tags() if 'confidences' in self.attributes and self.attributes['confidences'].get(t) is not None)

    def has_soft_tags(self):
        """Return true if there exist a confidence for any tag"""
        return len(self.soft_tags())>0

Ancestors

Labeled
Image

Subclasses

Scene

Instance variables

var attributes

Expand source code Browse git

class TaggedImage(Labeled):
    """vipy.image.TaggedImage class

    This class provides a representation of a vipy.image.Image with one or more tags.

    Valid constructors include all provided by vipy.image.Image with additional labels that provide ground truth for the content of the image. 

    ```python
    im = vipy.image.TaggedImage(filename='/path/to/dog.jpg', tags={'dog','canine'})
    ```
    """
    __slots__ = ('_filename', '_url', '_loader', '_array', '_colorspace', 'attributes')        
    def __init__(self, filename=None, url=None, attributes=None, array=None, colorspace=None, tags=None, category=None, confidence=None, caption=None):
        super().__init__(filename=filename,
                         url=url,
                         attributes=attributes,
                         array=array,
                         colorspace=colorspace)
        
        tags = ([category] if category is not None else []) + (tolist(tags) if tags is not None else [])
        if len(tags) > 0:
            self.set_attribute('tags', tags)
        if caption is not None:
            self.captions(caption)
            
    def __repr__(self):
        fields  = ['category=%s' % self.category()] if len(self.tags())==1 else []
        fields += ['caption=%s' % truncate_string(self.caption(), 40)] if self.caption() is not None else []        
        fields +=  ['confidence=%1.3f' % self.confidence()] if len(self.tags())==1 and self.confidence() is not None else []
        fields +=  ['tags=%s' % truncate_string(str(self.tags()), 40)] if len(self.tags())>1 else []
        return super().__repr__().replace('vipy.image.Image', 'vipy.image.TaggedImage').replace('>', ', %s>' % ', '.join(fields))
        

    @classmethod
    def from_json(cls, s):
        d = json.loads(s) if not isinstance(s, dict) else s
        return cls(filename=d['filename'] if 'filename' in d else None,
                   url=d['url'] if 'url' in d else None,
                   category=None,  # will be in attribute
                   tags=None,      # will be in attributes
                   attributes=d['attributes'] if 'attributes' in d else None,
                   colorspace=d['colorspace'] if 'colorspace' in d else None,
                   array=np.array(d['array'], dtype=np.uint8) if 'array' in d and d['array'] is not None else None)

    def category(self):
        return self.attributes['tags'][0] if 'tags' in self.attributes else None

    def new_category(self, c):
        self.attributes['tags'] = [c]
        self.del_attribute('confidences')
        return self
        
    def confidence(self, tag=None, default=None):
        t = tag if tag is not None else self.category()
        return self.get_attribute('confidences')[t] if self.hasattribute('confidences') and t in self.attributes['confidences'] else default

    def has_tag(self, t):
        return t in self.tags()
    
    def tags(self, tags=None):
        if tags is not None:
            return self.set_attribute('tags', tolist(tags))        
        return self.attributes['tags'] if 'tags' in self.attributes else []
    
    def add_tag(self, tag, confidence=None):
        self.append_attribute('tags', tag)
        if confidence is not None:
            if not self.hasattribute('confidences'):
                self.set_attribute('confidences', {})
            self.attributes['confidences'][tag] = confidence
        return self

    def add_caption(self, caption):
        self.append_attribute('captions', caption)
        return self
    
    def caption(self):
        return self.get_attribute('captions')[0] if self.hasattribute('captions') else None
    
    def captions(self, captions=None):
        if captions is not None:
            return self.set_attribute('captions', tolist(captions))
        return self.get_attribute('captions') if self.hasattribute('captions') else []
    
    def add_tags(self, tags, confidences=[]):
        for (t,c) in zip_longest(tags, confidences):
            self.add_tag(t, c)
        return self

    def clear_tags(self):        
        self.set_attribute('tags',[])
        if 'confidences' in self.attributes:
            del self.attributes['confidences']
        return self
    
    def add_soft_tags(self, soft_tags):
        """Soft tags are a list of (tag, confidence) tuples"""
        for (t,c) in soft_tags:
            self.add_tag(t, c)
        return self

    def add_soft_tag(self, soft_tag):
        """A soft tag is a tuple of (tag, confidence)"""
        return self.add_tag(*soft_tag)
    
    def soft_tags(self):
        """Soft tags are a list of (tag, confidence) tuples.  Will return only those tags with associated confidences.  Will return empty tuple if there are tags but no confidences"""
        return tuple((t, self.attributes['confidences'].get(t)) for t in self.tags() if 'confidences' in self.attributes and self.attributes['confidences'].get(t) is not None)

    def has_soft_tags(self):
        """Return true if there exist a confidence for any tag"""
        return len(self.soft_tags())>0

Methods

def add_caption(self, caption)

Expand source code Browse git

def add_caption(self, caption):
    self.append_attribute('captions', caption)
    return self

def add_soft_tag(self, soft_tag)

A soft tag is a tuple of (tag, confidence)

Expand source code Browse git

def add_soft_tag(self, soft_tag):
    """A soft tag is a tuple of (tag, confidence)"""
    return self.add_tag(*soft_tag)

def add_soft_tags(self, soft_tags)

Soft tags are a list of (tag, confidence) tuples

Expand source code Browse git

def add_soft_tags(self, soft_tags):
    """Soft tags are a list of (tag, confidence) tuples"""
    for (t,c) in soft_tags:
        self.add_tag(t, c)
    return self

def add_tag(self, tag, confidence=None)

Expand source code Browse git

def add_tag(self, tag, confidence=None):
    self.append_attribute('tags', tag)
    if confidence is not None:
        if not self.hasattribute('confidences'):
            self.set_attribute('confidences', {})
        self.attributes['confidences'][tag] = confidence
    return self

def add_tags(self, tags, confidences=[])

Expand source code Browse git

def add_tags(self, tags, confidences=[]):
    for (t,c) in zip_longest(tags, confidences):
        self.add_tag(t, c)
    return self

def caption(self)

Expand source code Browse git

def caption(self):
    return self.get_attribute('captions')[0] if self.hasattribute('captions') else None

def captions(self, captions=None)

Expand source code Browse git

def captions(self, captions=None):
    if captions is not None:
        return self.set_attribute('captions', tolist(captions))
    return self.get_attribute('captions') if self.hasattribute('captions') else []

def category(self)

Expand source code Browse git

def category(self):
    return self.attributes['tags'][0] if 'tags' in self.attributes else None

def clear_tags(self)

Expand source code Browse git

def clear_tags(self):        
    self.set_attribute('tags',[])
    if 'confidences' in self.attributes:
        del self.attributes['confidences']
    return self

def confidence(self, tag=None, default=None)

Expand source code Browse git

def confidence(self, tag=None, default=None):
    t = tag if tag is not None else self.category()
    return self.get_attribute('confidences')[t] if self.hasattribute('confidences') and t in self.attributes['confidences'] else default

def has_soft_tags(self)

Return true if there exist a confidence for any tag

Expand source code Browse git

def has_soft_tags(self):
    """Return true if there exist a confidence for any tag"""
    return len(self.soft_tags())>0

def has_tag(self, t)

Expand source code Browse git

def has_tag(self, t):
    return t in self.tags()

def new_category(self, c)

Expand source code Browse git

def new_category(self, c):
    self.attributes['tags'] = [c]
    self.del_attribute('confidences')
    return self

def soft_tags(self)

Soft tags are a list of (tag, confidence) tuples. Will return only those tags with associated confidences. Will return empty tuple if there are tags but no confidences

Expand source code Browse git

def soft_tags(self):
    """Soft tags are a list of (tag, confidence) tuples.  Will return only those tags with associated confidences.  Will return empty tuple if there are tags but no confidences"""
    return tuple((t, self.attributes['confidences'].get(t)) for t in self.tags() if 'confidences' in self.attributes and self.attributes['confidences'].get(t) is not None)

def tags(self, tags=None)

Expand source code Browse git

def tags(self, tags=None):
    if tags is not None:
        return self.set_attribute('tags', tolist(tags))        
    return self.attributes['tags'] if 'tags' in self.attributes else []

Inherited members

Labeled:
- PIL_loader
- abspath
- additive_noise
- affine_transform
- alpha
- alphapad
- annotate
- append_attribute
- area
- array
- ascii
- aspectratio
- base64
- bgr
- bgra
- bias
- blend
- blue
- blur
- bone
- border_mask
- bytes_array_loader
- canload
- cast
- centercrop
- centerpixel
- centersquare
- centroid
- channel
- channelmean
- channels
- clear_filename
- clone
- close
- closeall
- color_transform
- colorspace
- colorspace_like
- cornercrop
- dict
- download
- downloadif
- exif
- face_blur
- face_detection
- face_pixelize
- filename
- filesize
- fliplr
- flipud
- float
- flush
- from_json
- from_torch
- from_uri
- fromarray
- fromtorch
- gain
- get_attribute
- getattribute
- gray
- grayscale
- green
- grey
- greyscale
- has_filename
- has_url
- hasfilename
- hasurl
- height
- hot
- hsv
- html
- imagebox
- intensity
- is_downloaded
- is_loaded
- iscolor
- isdownloaded
- isgrey
- isloaded
- isluminance
- istransparent
- jet
- load
- loaded
- loader
- lum
- luminance
- map
- mat2gray
- maxdim
- maxmatte
- maxsquare
- mean
- meanchannel
- meanpad
- metadata
- mindim
- mindimn
- minsquare
- normalize
- numpy
- pad
- padcrop
- perceptualhash
- perceptualhash_distance
- person_detection
- pil
- pkl
- pklif
- print
- rainbow
- recenter
- red
- reload
- relpath
- rescale
- resize
- resize_like
- restore
- rgb
- rgba
- rot90ccw
- rot90cw
- rotate
- rotate_by_exif
- sanitize
- saturate
- save
- saveas
- saveastmp
- savefig
- savetmp
- set_attribute
- setattributes
- shape
- show
- splat
- store
- sum_to_one
- tile
- tocache
- tonumpy
- torch
- try_download
- try_load
- uncache
- uncrop
- union
- unload
- unstore
- untile
- uri
- url
- viewport
- width
- zeropad
- zeropadlike
- zeros

class Transform

Transforms are static methods that implement common transformation patterns used in distributed processing.

These are useful for parallel processing of noisy or corrupted images when anonymous functions are not supported (e.g. multiprocessing)

See also: Dataset.minibatch() for parallel processing of batches of images for downloading, loading, resizing, cropping, augmenting, tensor prep etc.

Expand source code Browse git

class Transform():
    """Transforms are static methods that implement common transformation patterns used in distributed processing.  

       These are useful for parallel processing of noisy or corrupted images when anonymous functions are not supported (e.g. multiprocessing)
 
       See also: `vipy.dataset.Dataset.minibatch` for parallel processing of batches of images for downloading, loading, resizing, cropping, augmenting, tensor prep etc.
    """
    
    @staticmethod
    def load(im):
        try:
            return im.clone().load()
        except:
            return im.flush()

    @staticmethod
    def download(im):
        try:
            return im.clone().download()
        except:
            return im.flush()

    @staticmethod
    def is_loaded(im):
        return im.is_loaded()

    @staticmethod
    def mindim(im, mindim=256):
        try:
            return im.clone().load().mindim(mindim)
        except:
            return im.flush()

        
    @staticmethod
    def thumbnail(im, mindim=64, outfile=None):
        try:
            return im.clone().load().mindim(mindim).save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
        except:
            return im.flush()

    @staticmethod
    def saveas(im, filename):
        try:
            return im.clone().load().saveas(filename)
        except:
            return im.flush()
        
    @staticmethod
    def annotate(im, mindim=64, outfile=None):
        try:
            return im.clone().load().mindim(mindim).annotate().save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
        except:
            return im.flush()
        
    @staticmethod
    def centersquare_32x32_normalized(im):
        return im.clone().load().rgb().centersquare().resize(32,32).gain(1/255) if not im.loaded() else im

    @staticmethod
    def centersquare_32x32_lum_normalized(im):
        return im.clone().load().centersquare().lum().resize(32,32).gain(1/255) if not im.loaded() else im
    
    @staticmethod
    def centersquare_256x256_normalized(im):
        return im.clone().load().rgb().centersquare().resize(256,256).gain(1/255) if not im.loaded() else im

    @staticmethod
    def mindim256_normalized(im):
        return im.clone().load().rgb().mindim(256).gain(1/255) if not im.loaded() else im
    
    @staticmethod
    def tensor(image, shape=None, gain=None, mindim=None, colorspace=None, centersquare=None, tensor=None, ignore_errors=False, jitter=None, num_augmentations=None):
        try:
            im = image.clone()
            if colorspace == 'lum':
                im = im.lum()
            if colorspace == 'rgb':
                im = im.rgb()
            if colorspace == 'float':
                im = im.float()
            if jitter == 'randomcrop':
                import vipy.noise                  
                im = vipy.noise.randomcrop(im)                
            if centersquare:
                im = im.centersquare()
            if shape is not None:
                im = im.resize(*shape)
            if mindim:
                im = im.mindim(mindim)
            if gain is not None:
                im = im.gain(gain)
            if tensor:
                im = im.torch()  # CHW
            if num_augmentations:
                augmentations = np.stack([np.atleast_3d(Transform.tensor(image, shape=shape, gain=gain, mindim=mindim, colorspace=colorspace, centersquare=centersquare, ignore_errors=ignore_errors, jitter=jitter).array())
                                          for k in range(num_augmentations+1)], axis=3)  # +1 for mean 
                return image.clone().array(augmentations)  # packed nd-array, use im.torch('NCHW') to access
                
            return im
        
        except KeyboardInterrupt:
            raise
        except:
            if not ignore_errors:
                raise
            return None

    @staticmethod
    def to_tensor(**kwargs):
        return functools.partial(Transform.tensor, **kwargs)

    @staticmethod
    def is_transformed(im):
        return im is not None

Static methods

def annotate(im, mindim=64, outfile=None)

Expand source code Browse git

@staticmethod
def annotate(im, mindim=64, outfile=None):
    try:
        return im.clone().load().mindim(mindim).annotate().save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
    except:
        return im.flush()

def centersquare_256x256_normalized(im)

Expand source code Browse git

@staticmethod
def centersquare_256x256_normalized(im):
    return im.clone().load().rgb().centersquare().resize(256,256).gain(1/255) if not im.loaded() else im

def centersquare_32x32_lum_normalized(im)

Expand source code Browse git

@staticmethod
def centersquare_32x32_lum_normalized(im):
    return im.clone().load().centersquare().lum().resize(32,32).gain(1/255) if not im.loaded() else im

def centersquare_32x32_normalized(im)

Expand source code Browse git

@staticmethod
def centersquare_32x32_normalized(im):
    return im.clone().load().rgb().centersquare().resize(32,32).gain(1/255) if not im.loaded() else im

def download(im)

Expand source code Browse git

@staticmethod
def download(im):
    try:
        return im.clone().download()
    except:
        return im.flush()

def is_loaded(im)

Expand source code Browse git

@staticmethod
def is_loaded(im):
    return im.is_loaded()

def is_transformed(im)

Expand source code Browse git

@staticmethod
def is_transformed(im):
    return im is not None

def load(im)

Expand source code Browse git

@staticmethod
def load(im):
    try:
        return im.clone().load()
    except:
        return im.flush()

def mindim(im, mindim=256)

Expand source code Browse git

@staticmethod
def mindim(im, mindim=256):
    try:
        return im.clone().load().mindim(mindim)
    except:
        return im.flush()

def mindim256_normalized(im)

Expand source code Browse git

@staticmethod
def mindim256_normalized(im):
    return im.clone().load().rgb().mindim(256).gain(1/255) if not im.loaded() else im

def saveas(im, filename)

Expand source code Browse git

@staticmethod
def saveas(im, filename):
    try:
        return im.clone().load().saveas(filename)
    except:
        return im.flush()

def tensor(image, shape=None, gain=None, mindim=None, colorspace=None, centersquare=None, tensor=None, ignore_errors=False, jitter=None, num_augmentations=None)

Expand source code Browse git

@staticmethod
def tensor(image, shape=None, gain=None, mindim=None, colorspace=None, centersquare=None, tensor=None, ignore_errors=False, jitter=None, num_augmentations=None):
    try:
        im = image.clone()
        if colorspace == 'lum':
            im = im.lum()
        if colorspace == 'rgb':
            im = im.rgb()
        if colorspace == 'float':
            im = im.float()
        if jitter == 'randomcrop':
            import vipy.noise                  
            im = vipy.noise.randomcrop(im)                
        if centersquare:
            im = im.centersquare()
        if shape is not None:
            im = im.resize(*shape)
        if mindim:
            im = im.mindim(mindim)
        if gain is not None:
            im = im.gain(gain)
        if tensor:
            im = im.torch()  # CHW
        if num_augmentations:
            augmentations = np.stack([np.atleast_3d(Transform.tensor(image, shape=shape, gain=gain, mindim=mindim, colorspace=colorspace, centersquare=centersquare, ignore_errors=ignore_errors, jitter=jitter).array())
                                      for k in range(num_augmentations+1)], axis=3)  # +1 for mean 
            return image.clone().array(augmentations)  # packed nd-array, use im.torch('NCHW') to access
            
        return im
    
    except KeyboardInterrupt:
        raise
    except:
        if not ignore_errors:
            raise
        return None

def thumbnail(im, mindim=64, outfile=None)

Expand source code Browse git

@staticmethod
def thumbnail(im, mindim=64, outfile=None):
    try:
        return im.clone().load().mindim(mindim).save(outfile if outfile else tocache(shortuuid(8)+'.jpg'))
    except:
        return im.flush()

def to_tensor(**kwargs)

Expand source code Browse git

@staticmethod
def to_tensor(**kwargs):
    return functools.partial(Transform.tensor, **kwargs)