#!/usr/bin/env python

"""Bin the components of metalabels, by applying np.digitize. metalables are
expected to contain numbers or `nan`. In the latter case the output metalabel
will be substituted to a string of your choice
"""

import sys
import logging
import argparse
import numpy as np
from itertools import imap
from operator import itemgetter
from dimer.argutils import ArgTypes, parser_inst
from dimer.genome import frepr


logging.basicConfig(level=logging.INFO)
lg = logging.getLogger()


if __name__ != '__main__':
    lg.error("this is a script do not import")
    sys.exit(1)

parser = parser_inst(__doc__)
parser.add_argument("input", type=ArgTypes.is_file, help="Input data")
parser.add_argument("--thr", type=str, default="auto",
                    help=("Binarizing threshold. "
                          "auto is 0.5 after fitting values to [0, 1]")
                    )
parser.add_argument("--suff", type=str, default="=",
                    help="Suffix to metalabels")
parser.add_argument("--nan_symb", type=str, default="n",
                    help=("if any component of a metalabel is a `nan`, "
                          "substitute with this."))
parser.add_argument("--keep", nargs="+", type=ArgTypes.is_uint, default=[],
                    help=("Keep the labels with these indexes. Indexes are 0-based "
                          "Keeps all by default."))
parser.add_argument("--ec", action='store_true', default=False,
                    help=("Check format of the meatlabel"))

opt = parser.parse_args()
if opt.ec:
    rf_factory = frepr.RawFrep.from_line_ec
else:
    rf_factory = frepr.RawFrep.from_line

if not (opt.thr in ("auto", "max", "norm")):
    opt.thr = float(opt.thr)
elif opt.thr == "norm":
    lg.info("computing mean/sd ...")
    x = []
    with open(opt.input) as fd:
        X = np.array([rf.arr_metalab for rf in imap(rf_factory, fd)])
    mX = np.ma.masked_array(X, np.isnan(X))
    X -= np.median(mX, axis=0)
    bins = [np.min(mX), 0.000001]
    assert np.min(mX) <= 0, str(np.min(mX))

    with open(opt.input) as fd:
        for i, rf in enumerate(imap(rf_factory, fd)):
            if np.any(np.isnan(X[i])):
                x = [opt.nan_symb] * rf.nunits
            else:
                x = np.digitize(X[i], bins) - 1
            newl = frepr.RawFrep._mtlb_sep.join(map(str, x))
            new_repr = rf._replace(metalab=newl)
            new_repr.keep_units(opt.keep)
            print new_repr.to_raw() + opt.suff
else:
    with open(opt.input) as fd:
        for raw_repr in imap(frepr.RawFrep.from_line, fd):
            new_repr = raw_repr.keep_units(opt.keep).binarized(opt.thr,
                                                           opt.nan_symb)
            print new_repr.to_raw() + opt.suff
