Source code for mdptoolbox.util

# -*- coding: utf-8 -*-
"""Markov Decision Process (MDP) Toolbox: ``util`` module
======================================================

The ``util`` module provides functions to check that an MDP is validly
described. There are also functions for working with MDPs while they are being
solved.

Available functions
-------------------

:func:`~mdptoolbox.util.check`
    Check that an MDP is properly defined
:func:`~mdptoolbox.util.checkSquareStochastic`
    Check that a matrix is square and stochastic
:func:`~mdptoolbox.util.getSpan`
    Calculate the span of an array
:func:`~mdptoolbox.util.isNonNegative`
    Check if a matrix has only non-negative elements
:func:`~mdptoolbox.util.isSquare`
    Check if a matrix is square
:func:`~mdptoolbox.util.isStochastic`
    Check if a matrix is row stochastic

"""

# Copyright (c) 2011-2015 Steven A. W. Cordwell
# Copyright (c) 2009 INRA
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#   * Neither the name of the <ORGANIZATION> nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import numpy as _np

import mdptoolbox.error as _error

_MDPERR = {
"mat_nonneg" :
    "Transition probabilities must be non-negative.",
"mat_square" :
    "A transition probability matrix must be square, with dimensions S×S.",
"mat_stoch" :
    "Each row of a transition probability matrix must sum to one (1).",
"obj_shape" :
    "Object arrays for transition probabilities and rewards "
    "must have only 1 dimension: the number of actions A. Each element of "
    "the object array contains an SxS ndarray or matrix.",
"obj_square" :
    "Each element of an object array for transition "
    "probabilities and rewards must contain an SxS ndarray or matrix; i.e. "
    "P[a].shape = (S, S) or R[a].shape = (S, S).",
"P_type" :
    "The transition probabilities must be in a numpy array; "
    "i.e. type(P) is ndarray.",
"P_shape" :
    "The transition probability array must have the shape "
    "(A, S, S)  with S : number of states greater than 0 and A : number of "
    "actions greater than 0. i.e. R.shape = (A, S, S)",
"PR_incompat" :
    "Incompatibility between P and R dimensions.",
"R_type" :
    "The rewards must be in a numpy array; i.e. type(R) is "
    "ndarray, or numpy matrix; i.e. type(R) is matrix.",
"R_shape" :
    "The reward matrix R must be an array of shape (A, S, S) or "
    "(S, A) with S : number of states greater than 0 and A : number of "
    "actions greater than 0. i.e. R.shape = (S, A) or (A, S, S)."
}


def _checkDimensionsListLike(arrays):
    """Check that each array in a list of arrays has the same size.

    """
    dim1 = len(arrays)
    dim2, dim3 = arrays[0].shape
    for aa in range(1, dim1):
        dim2_aa, dim3_aa = arrays[aa].shape
        if (dim2_aa != dim2) or (dim3_aa != dim3):
            raise _error.InvalidError(_MDPERR["obj_square"])
    return dim1, dim2, dim3


def _checkRewardsListLike(reward, n_actions, n_states):
    """Check that a list-like reward input is valid.

    """
    try:
        lenR = len(reward)
        if lenR == n_actions:
            dim1, dim2, dim3 = _checkDimensionsListLike(reward)
        elif lenR == n_states:
            dim1 = n_actions
            dim2 = dim3 = lenR
        else:
            raise _error.InvalidError(_MDPERR["R_shape"])
    except AttributeError:
        raise _error.InvalidError(_MDPERR["R_shape"])
    return dim1, dim2, dim3


[docs]def isSquare(matrix): """Check that ``matrix`` is square. Returns ======= is_square : bool ``True`` if ``matrix`` is square, ``False`` otherwise. """ try: try: dim1, dim2 = matrix.shape except AttributeError: dim1, dim2 = _np.array(matrix).shape except ValueError: return False if dim1 == dim2: return True return False
[docs]def isStochastic(matrix): """Check that ``matrix`` is row stochastic. Returns ======= is_stochastic : bool ``True`` if ``matrix`` is row stochastic, ``False`` otherwise. """ try: absdiff = (_np.abs(matrix.sum(axis=1) - _np.ones(matrix.shape[0]))) except AttributeError: matrix = _np.array(matrix) absdiff = (_np.abs(matrix.sum(axis=1) - _np.ones(matrix.shape[0]))) return (absdiff.max() <= 10*_np.spacing(_np.float64(1)))
[docs]def isNonNegative(matrix): """Check that ``matrix`` is row non-negative. Returns ======= is_stochastic : bool ``True`` if ``matrix`` is non-negative, ``False`` otherwise. """ try: if (matrix >= 0).all(): return True except (NotImplementedError, AttributeError, TypeError): try: if (matrix.data >= 0).all(): return True except AttributeError: matrix = _np.array(matrix) if (matrix.data >= 0).all(): return True return False
[docs]def checkSquareStochastic(matrix): """Check if ``matrix`` is a square and row-stochastic. To pass the check the following conditions must be met: * The matrix should be square, so the number of columns equals the number of rows. * The matrix should be row-stochastic so the rows should sum to one. * Each value in the matrix must be positive. If the check does not pass then a mdptoolbox.util.Invalid Arguments --------- ``matrix`` : numpy.ndarray, scipy.sparse.*_matrix A two dimensional array (matrix). Notes ----- Returns None if no error has been detected, else it raises an error. """ if not isSquare(matrix): raise _error.SquareError if not isStochastic(matrix): raise _error.StochasticError if not isNonNegative(matrix): raise _error.NonNegativeError
[docs]def check(P, R): """Check if ``P`` and ``R`` define a valid Markov Decision Process (MDP). Let ``S`` = number of states, ``A`` = number of actions. Arguments --------- P : array The transition matrices. It can be a three dimensional array with a shape of (A, S, S). It can also be a one dimensional arraye with a shape of (A, ), where each element contains a matrix of shape (S, S) which can possibly be sparse. R : array The reward matrix. It can be a three dimensional array with a shape of (S, A, A). It can also be a one dimensional array with a shape of (A, ), where each element contains matrix with a shape of (S, S) which can possibly be sparse. It can also be an array with a shape of (S, A) which can possibly be sparse. Notes ----- Raises an error if ``P`` and ``R`` do not define a MDP. Examples -------- >>> import mdptoolbox, mdptoolbox.example >>> P_valid, R_valid = mdptoolbox.example.rand(100, 5) >>> mdptoolbox.util.check(P_valid, R_valid) # Nothing should happen >>> >>> import numpy as np >>> P_invalid = np.random.rand(5, 100, 100) >>> mdptoolbox.util.check(P_invalid, R_valid) # Raises an exception Traceback (most recent call last): ... StochasticError:... """ # Checking P try: if P.ndim == 3: aP, sP0, sP1 = P.shape elif P.ndim == 1: aP, sP0, sP1 = _checkDimensionsListLike(P) else: raise _error.InvalidError(_MDPERR["P_shape"]) except AttributeError: try: aP, sP0, sP1 = _checkDimensionsListLike(P) except AttributeError: raise _error.InvalidError(_MDPERR["P_shape"]) msg = "" if aP <= 0: msg = "The number of actions in P must be greater than 0." elif sP0 <= 0: msg = "The number of states in P must be greater than 0." if msg: raise _error.InvalidError(msg) # Checking R try: ndimR = R.ndim if ndimR == 1: aR, sR0, sR1 = _checkRewardsListLike(R, aP, sP0) elif ndimR == 2: sR0, aR = R.shape sR1 = sR0 elif ndimR == 3: aR, sR0, sR1 = R.shape else: raise _error.InvalidError(_MDPERR["R_shape"]) except AttributeError: aR, sR0, sR1 = _checkRewardsListLike(R, aP, sP0) msg = "" if sR0 <= 0: msg = "The number of states in R must be greater than 0." elif aR <= 0: msg = "The number of actions in R must be greater than 0." elif sR0 != sR1: msg = "The matrix R must be square with respect to states." elif sP0 != sR0: msg = "The number of states must agree in P and R." elif aP != aR: msg = "The number of actions must agree in P and R." if msg: raise _error.InvalidError(msg) # Check that the P's are square, stochastic and non-negative for aa in range(aP): checkSquareStochastic(P[aa])
[docs]def getSpan(array): """Return the span of `array` span(array) = max array(s) - min array(s) """ return array.max() - array.min()