#!/usr/bin/env python

"""Summary of an archive contents. Support:
    + dataset
    - training info
    - weight info
    - input representation
"""

import argparse
import logging
import random

from dimer import archive
from dimer.data import AnchorDataset

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        epilog="Olgert Denas (Taylor Lab)")
    parser.add_argument("dataset", type=archive.dset_path,
                        help="Dataset. " + archive.DSPEC_MSG)
    parser.add_argument("--sample", type=int, default=20,
                        help="Show info about a sample of the input of this size.")
    opt = parser.parse_args()

    ds = AnchorDataset._from_archive(opt.dataset, False)
    idx = random.sample(range(ds.X.shape[0]), min(ds.X.shape[0], opt.sample))
    if ds.dfT:
        print "%s: Labels" % opt.dataset
        print ds.dfT.iloc[idx].describe()
    print "%s: Y" % opt.dataset
    print ds.sY.iloc[idx].describe()
    print "%s: X" % opt.dataset
    print ds.pX.iloc[idx].sum(axis=0).describe()
