'''
common operations on numpy arrays
'''

import logging
import numpy as np


lg = logging.getLogger(__name__)


def fit(x, axis=None, nans=0.0):
    """scale values of x in the interval [-1, 1]

    :param ndarray x: data to fit
    :param int axis: passed to numpy.max and numpy.min
    :param float nans: if nan values are produced replace them with this
    :rtype: scaled values of x"""

    M = x.max(axis=axis)
    m = x.min(axis=axis)
    mid_p = (M + m) / 2
    rng = (M - m)

    fitx = 2 * (x - mid_p) / rng
    fitx[ np.isnan(fitx) ] = nans

    if np.min(fitx) < min(nans, -1):
        lg.warning("data not in [-1, 1]. min(data)=%f", np.min(fitx))
    if np.max(fitx) > max(nans, 1):
        lg.warning("data not in [-1, 1]. max(data)=%f", np.max(fitx))

    return fitx


def standardize(x, axis=None, nans=0.0):
    """transform each component of `x` to 0 mean and 1 std

    mean and variance can be applied along an axis if specified. E.g.,
    if axis=0 mean & variance is computed w.r.t. to that axis and those
    are applied to the points. if axis=None, normalization is w.r.t to the
    whole array.

    :param  ndarray x: input array
    :rtype: a triple (standardized_x, mean(x), std(x))
    """

    if not (axis is None):
        m = x.mean(axis=axis)
        v = x.std(axis=axis)
        osh = m.shape
        nsh = list(x.shape); nsh[axis] = 1
        m = m.reshape(tuple(nsh))
        v = v.reshape(tuple(nsh))
    else:
        m = x.mean()
        v = x.std()
        osh = m.shape

    normX = ( (x - m) / v )
    normX[ np.isnan(normX) ] = nans

    return normX, m.reshape(osh), v.reshape(osh)


def binned_signal(x, bin_size, bin_f):
    """bin the given signal

    :param ndarray x: a 1-dimensional signal
    :param int bin_size: bin size. if doesn't divide size of x exception is thrown
    :param callable bin_f: how to summerize bins
    :rtype: a 1-d array of shape (x.shape[0] / bin_size)"""

    assert len(x.shape) == 1

    L = x.shape[0]
    r = L % bin_size
    if r:
        raise ValueError("bin_size (%d) doesn't divide |x| (%d)" % (bin_size,
                                                                    L))
    X = bin_f(x.reshape((-1, bin_size)), axis=1)
    assert X.shape == (L / bin_size, )
    return X

def get_valid_idx(xsg):
    """find array row-positions i, s.t., no arrays from xsg have NaNs
    at those rows.

    :param list xsg: a list of 2d-ndarrays of the same size
    :rtype: ndarray of of bools, of the same size array-rows
    """

    any_nans = lambda r: np.any(np.isnan(r))
    ## each element of xsg has some rows (anchors) with NaN values. find out which
    na_indicator = map(lambda x: np.apply_along_axis(any_nans, 1, x), xsg)
    ## make union of those elements
    na_indicator_u = reduce(np.logical_or, na_indicator)
    ## we want to keep those without NaN
    res =  np.logical_not(na_indicator_u)
    lg.info("%d/%d valid entries", len(filter(bool, res)), xsg[0].shape[0])
    return res


def inverse_hsine(x):
    """see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3340533/

    :param ndarray x: input signal
    :rtype: ndarray
    """

    return np.log(x + np.sqrt(x**2 + 1))


## thanks to sklearn *** NOT USED FOR NOW ***
def warn_if_not_float(X, estimator='This algorithm'):
    """Warning utility function to check that data type is floating point"""
    if not isinstance(estimator, basestring):
        estimator = estimator.__class__.__name__
    if X.dtype.kind != 'f':
        warnings.warn("%s assumes floating point values as input, "
                      "got %s" % (estimator, X.dtype))


def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
    """Compute mean and std deviation for centering, scaling.

    Zero valued std components are reset to 1.0 to avoid NaNs when scaling.
    """
    X = np.asarray(X)
    Xr = np.rollaxis(X, axis)

    if with_mean:
        mean_ = Xr.mean(axis=0)
    else:
        mean_ = None

    if with_std:
        std_ = Xr.std(axis=0)
        if isinstance(std_, np.ndarray):
            std_[std_ == 0.0] = 1.0
        elif std_ == 0.:
            std_ = 1.
    else:
        std_ = None

    return mean_, std_


def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
    """Standardize a dataset along any axis

    Center to the mean and component wise scale to unit variance.
    This implementation will refuse to center scipy.sparse matrices
    since it would make them non-sparse and would potentially crash the
    program with memory exhaustion problems.

    Instead the caller is expected to either set explicitly
    `with_mean=False` (in that case, only variance scaling will be
    performed on the features of the CSR matrix) or to call `X.toarray()`
    if he/she expects the materialized dense array to fit in memory.

    To avoid memory copy the caller should pass a CSR matrix.

    :param ndarray X: array-like or CSR matrix.
        The data to center and scale.

    :param int axis: (0 by default)
        axis used to compute the means and standard deviations along. If 0,
        independently standardize each feature, otherwise (if 1) standardize
        each sample.

    :param bool with_mean: True by default
        If True, center the data before scaling.

    :param bool with_std: True by default
        If True, scale the data to unit variance (or equivalently,
        unit standard deviation).

    :param bool copy: optional, default is True
        set to False to perform inplace row normalization and avoid a
        copy (if the input is already a numpy array or a scipy.sparse
        CSR matrix and if axis is 1).
    """
    if sp.issparse(X):
        if with_mean:
            raise ValueError(
                "Cannot center sparse matrices: pass `with_mean=False` instead"
                " See docstring for motivation and alternatives.")
        if axis != 0:
            raise ValueError("Can only scale sparse matrix on axis=0, "
                             " got axis=%d" % axis)
        warn_if_not_float(X, estimator='The scale function')
        if not sp.isspmatrix_csr(X):
            X = X.tocsr()
            copy = False
        if copy:
            X = X.copy()
        _, var = mean_variance_axis0(X)
        var[var == 0.0] = 1.0
        inplace_csr_column_scale(X, 1 / np.sqrt(var))
    else:
        X = np.asarray(X)
        warn_if_not_float(X, estimator='The scale function')
        mean_, std_ = _mean_and_std(
            X, axis, with_mean=with_mean, with_std=with_std)
        if copy:
            X = X.copy()
        # Xr is a view on the original array that enables easy use of
        # broadcasting on the axis in which we are interested in
        Xr = np.rollaxis(X, axis)
        if with_mean:
            Xr -= mean_
        if with_std:
            Xr /= std_
    return X

def binning_effect(X, bin_size):
    assert len(X.shape) == 2
    if X.shape[1] % bin_size:
        raise ValueError("bin_size (%d) doesn't divide |x| (%d)" % (bin_size,
                                                                    X.shape[1]))
    Hb = np.zeros((1 + X.shape[0],), dtype=np.int)
    for i in range(0, X.shape[1] / bin_size):
        start_idx = i * bin_size
        end_idx = (i + 1) * bin_size
        binned_slice = X[:,start_idx: end_idx].sum(axis=1)
        hb_idx = np.count_nonzero(binned_slice)
        Hb[hb_idx] += 1
    return Hb
