#!/usr/bin/env python

"""transform a bedfile, A, into another bed file, B,
that covers the whole geome, where
B's elements that are in A will have a score = average
of all A's features that cover
that position
"""

import sys
import subprocess
import argparse
import logging
import tempfile
from itertools import imap, ifilter, chain

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

log.warning("more testing needed ...")

if __name__ != '__main__':
    log.error("this is a script do not import")
    sys.exit(1)


def send_to_tmpfile(outlines, parse_l, filter_l, out_frm, task_name="output"):
    ofname = ""
    with tempfile.NamedTemporaryFile(delete=False) as ofd:
        ofname = ofd.name
        log.info("%s -> %s" % (task_name, ofname))
        for parsed_line in imap(parse_l, ifilter(filter_l, outlines)):
            print >>ofd, out_frm(parsed_line)
    return ofname


def chsizes2bed(fn):
    parse_l = lambda l: l.strip().split("\t")
    filter_l = lambda l: len(l.strip())
    out_frm = lambda (cn, sz): "\t".join((cn, "0", sz))

    with open(fn) as fd:
        return send_to_tmpfile(sorted(fd), parse_l, filter_l,
                               out_frm, "chrom_sizes")


def merge(bed_fn):
    parse_merged = lambda (i, l): (i, l.rstrip().split("\t"))
    filter_merged = lambda (i, l): len(l.strip())
    out_frm = lambda (i, (ch, st, en, sc)): "\t".join((ch, st, en, "pk%d" % i, sc))

    cmd = ["bedtools merge -scores mean -i %s" % bed_fn]
    outl = subprocess.check_output(cmd, shell=True).split("\n")
    return send_to_tmpfile(enumerate(outl), parse_merged,
                           filter_merged, out_frm, "bed_ones")


def bed_diff(f1, f2):
    parse_merged = lambda (i, l): (i, l.rstrip().split("\t"))
    filter_merged = lambda (i, l): len(l.strip())
    out_frm = lambda (i, (ch, st, en)): "\t".join((ch, st, en, "pk%d" % i, "0.0"))
    cmd = ["bedops --ec -d %s %s" % (f1, f2)]
    outl = subprocess.check_output(cmd, shell=True).split("\n")
    return send_to_tmpfile(enumerate(outl), parse_merged,
                           filter_merged, out_frm, "bed_zeros")


def sort_k(l):
    (ch, s, e, n, sc) = l.strip().split("\t")
    return (ch, int(s), int(e))


parser = argparse.ArgumentParser(
    description=__doc__,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    epilog="Taylor Lab (odenas@emory.edu)")
parser.add_argument("input", type=str, help="Bed file in input (>= BED5)")
parser.add_argument("chsizes", type=str,
                    help="Chromosome sizes (chname<tab>chsize)")

opt = parser.parse_args()

bed_ones = merge(opt.input)
whole_genome = chsizes2bed(opt.chsizes)
bed_zeros = bed_diff(whole_genome, bed_ones)

outl = sorted(chain(open(bed_ones), open(bed_zeros)), key=sort_k)
for l in outl:
    print l,
