#!/home/task_175767359472903/conda-bld/geoparse_1757673838305/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehol/bin/python
"""Download GSM/GSE as SRA or FASTQ files."""
import sys
import os
import json
import argparse
import GEOparse


def parse(args):

    p = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawTextHelpFormatter
    )

    p.add_argument(
        "term",
        help="GEO accession, could be series (GSE) or sample (GSM)",
        metavar="GEOID",
    )
    p.add_argument(
        "email", help="e-mail address for Entrez API", metavar="EMAIL"
    )
    p.add_argument(
        "-o",
        "--outdir",
        dest="outdir",
        help="output directory, defaults to '.'",
        metavar="DIR",
        default="./",
    )
    p.add_argument(
        "-f",
        "--filetype",
        dest="filetype",
        help="filetype to download, defaults to 'fastq'",
        default="fastq",
        choices=("sra", "fastq"),
    )
    p.add_argument(
        "-j",
        "--jobs",
        help="number of threads/jobs for the fastq dump",
        metavar="NRTHREADS",
        default=1,
    )
    p.add_argument(
        "-k",
        "--keep-sra",
        dest="keep_sra",
        default=False,
        action="store_true",
        help="keep sra files",
    )
    p.add_argument(
        "-fb",
        "--filter-by",
        dest="filter_by",
        nargs="*",
        required=False,
        help="filters to apply",
    )
    p.add_argument(
        "-fo",
        "--filter-operator",
        dest="filter_operator",
        default="or",
        choices=("or", "and"),
        help=(
            "logical operator on how to connect multiple filters, "
            "defaults to "
            or "."
        ),
    )
    return p.parse_args(args)


if __name__ == "__main__":
    args = parse(sys.argv[1:])

    def _filterby(g):
        """Helper filter function."""
        if args.filter_operator == "or":
            return any(
                [
                    (f in g.metadata["title"][0] or f in g.name)
                    for f in args.filter_by
                ]
            )
        elif args.filter_operator == "and":
            return all(
                [
                    (f in g.metadata["title"][0] or f in g.name)
                    for f in args.filter_by
                ]
            )
        else:
            raise Exception(
                "No such a logical operator %s" % args.filter_operator
            )

    if not (args.term.startswith("GSM") or args.term.startswith("GSE")):
        raise RuntimeError("only GSM and GSE ids work for now!")
    geo_object = GEOparse.get_GEO(args.term)
    download_options = {
        "filetype": args.filetype,
        "keep_sra": args.keep_sra,
        "silent": True if args.jobs > 1 else False,
    }
    if args.filter_by is not None:
        downloaded_files = geo_object.download_SRA(
            args.email,
            directory=args.outdir,
            aspera="ASPERA_HOME" in os.environ,
            nproc=args.jobs,
            filterby=_filterby,
            **download_options,
        )
    else:
        downloaded_files = geo_object.download_SRA(
            args.email,
            directory=args.outdir,
            aspera="ASPERA_HOME" in os.environ,
            nproc=args.jobs,
            **download_options,
        )
    print("{}".format(json.dumps(downloaded_files, indent=4)))
