"""
wrapper to some of the bedops (http://code.google.com/p/bedops/) utilities
"""


import subprocess
import re
from itertools import ifilter
from functools import partial
from operator import attrgetter
import numpy as np
from bx.intervals.intersection import Interval, IntervalTree
from bx.intervals.io import GenomicIntervalReader, NiceReaderWrapper

from dimer.genome import parseBED
from dimer import ops

BEDMAP = "bedmap"

class GenomeIntervalTree( object ):
    """a wrapper over bx.intervals.intersection.IntervalTree that supports searching with chromosomes

    this is basically a dict of IntervalTrees indexed by chromosomes. elements
    mus be in the format (chrom, start, end, ...)"""


    def __init__(self, data=[]):
        "data is am iterable of tuples of the type (chrom, <int>start, <int>end, ...)"

        self.trset = {}
        for elem in data:
            self.add_el(elem)

    def add_el(self, elem):
        """add an element

        :param tuple elem:tuples of the type (chrom, <int>start, <int>end, ...)
        :rtype: None"""

        self.add(elem[0], elem[1], elem[2], elem)

    def add(self, chrom, start, end, val=None):
        """add an element specified by the given coordinates

        :param str chrom: chromosome
        :param int start: start
        :param int end: end
        :rtype: None"""

        self.trset.setdefault(chrom, IntervalTree())
        self.trset[chrom].add_interval( Interval( start, end, val ) )

    def find(self, chrom, start, end):
        """find an element specified by the given coordinates

        :param str chrom: chromosome
        :param int start: start
        :param int end: end
        :rtype: list of intersecting elements in the tree"""

        if chrom in self.trset:
            return self.trset[chrom].find(start, end)
        return []

    def find_el(self, elem):
        """find an element

        :param tuple elem:tuples of the type (chrom, <int>start, <int>end, ...)
        :rtype: list of intersecting elements in the tree"""

        return self.find(elem[0], elem[1], elem[2])


def bedoverlap(refdata, mapdata, parse=parseBED, onlyOverlapping=True):
    """Traverse the coordinates in <refdata>, while collecting
       overlapping elements from <mapdata>.  Output is one line for each line in <refdata>.

    :param str refdata: reference intervals. output is with respect to these. I.e.,
            unless there is filtering, output has one line for each refdata line
    :param str mapdata: qualifying (e.g, within a range of a refdata element)
            intervals on which to apply operations.
    :param bool parsePatt: a regex object or pattern used to parse the output of bedmap.
              Make this grouped, to get a tuple of groups. Notice that first group
              of this should be "([01])"
    :rtype: str. iterator over overapping features
    """

    ## read mapdata into an IntervalTree
    gtree = GenomeIntervalTree(map(parse, open(mapdata)))

    ## fetch from gtree (mapdata) all intervals that
    ## intersect an element of refdata
    for elem in map(parse, open(refdata)):
        map_elements = gtree.find_el(elem)
        if onlyOverlapping and (not map_elements):
            continue
        yield (elem, map(attrgetter("value"), map_elements))


def bedmap(refdata, mapdata, ops=("--echo", "--echo-map"),
           parsePatt="([01])[|](.+)[|](.*)", onlyOverlapping=True):
    """run bedmap on the given files and return an iterator over the results. to run this,
    you need to have installed bedops (Shane Neph, M. Scott Kuehn, Alex P. Reynolds, et al.
    BEDOPS: high-performance genomic feature operations Bioinformatics (2012) 28 (14): 1919-1920.).
    Try to run `bedops` from a terminal


    bedmap command is: bedmap opts refdata mapdata

    :param str refdata: reference intervals. output is with respect to these. I.e.,
            unless there is filtering, output has one line for each refdata line
    :param str mapdata: qualifying (e.g, within a range of a refdata element)
            intervals on which to apply operations.
    :param list ops: operations (see bedmap help)
    :param bool parsePatt: a regex object or pattern used to parse the output of bedmap.
              Make this grouped, to get a tuple of groups. Notice that first group
              of this should be "([01])"
    :rtype: str. iterator over overapping features
    """

    cmd = ("bedmap", "--ec", "--indicator") + tuple(ops) + (refdata, mapdata)
    out = ifilter(bool, subprocess.check_output(cmd).split("\n"))
    if onlyOverlapping:
        out = ifilter(lambda tp: tp[0] == '1', out)
    for line in out:
        match = re.match(parsePatt, line)
        yield match.groups()[1:]


def overlap_as_array(anchor, feat_lst, bin_size=1, bin_f=np.sum, dtype=np.float64, respect_strand=True):
    """convert an anchor and a list of overlapping features on a signal array

    :param tuple anchor: BED5
    :param list feat_list: [(chrom, start, end, name, score), ...]
    :param int bin_size: bin_size the signal
    :param callable bin_f: how to summerize bins
    :param numpy.dtype dtype: type of output array
    :param bool use_score: whether to use score from feature. will set to 1.0 if False
    :param book respect_strand: whether to mirror (w.r.t middle of anchor) the signal
        for anchors on the negative strand

    returns a signal array of length anchor[2] - anchor[1]
    """

    def mydiff(v, S=anchor[1]):
        dv = v - S
        if dv < 0:
            return 0
        return dv

    x = np.zeros((anchor[2] - anchor[1],), dtype=dtype)
    for ft in filter(bool, feat_lst):
        (ch, s, e, n, v) = ft
        x[mydiff(s):mydiff(e)] += v
    binned_x = ops.binned_signal(x, bin_size, bin_f)
    if respect_strand:
        if len(anchor) < 6:
            raise ValueError("respect_strand on >BED6 anchors.")
        if anchor[5] == "-":
            for i in range(binned_x.shape[0] / 2):
                binned_x[i], binned_x[-(i + 1)] = binned_x[-(i + 1)], binned_x[i]
    return binned_x


if __name__ == "__main__":
    pass
