#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
It labels the candidate list of fusion genes generated by 'find_fusion_genes.py' with ZNF mosaic.



Author: Daniel Nicorici, Daniel.Nicorici@gmail.com

Copyright (c) 2009-2022 Daniel Nicorici

This file is part of FusionCatcher.

FusionCatcher is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

FusionCatcher is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with FusionCatcher (see file 'COPYING.txt').  If not, see
<http://www.gnu.org/licenses/>.

By default, FusionCatcher is running BLAT aligner
<http://users.soe.ucsc.edu/~kent/src/> but it offers also the option to disable
all its scripts which make use of BLAT aligner if you choose explicitly to do so.
BLAT's license does not allow to be used for commercial activities. If BLAT
license does not allow to be used in your case then you may still use
FusionCatcher by forcing not use the BLAT aligner by specifying the option
'--skip-blat'. Fore more information regarding BLAT please see its license.

Please, note that FusionCatcher does not require BLAT in order to find
candidate fusion genes!

This file is not running/executing/using BLAT.

"""
import sys
import os
import optparse

def is_znf_gene(x):
    f = False
    if x and x.startswith("ZNF"):
        u = x[3:]
        if u:
            try:
                we = int(u)
                f = True
            except:
                f = False
    return f


if __name__ == '__main__':

    #command line parsing

    usage="%prog [options]"
    description="""It labels the candidate list of fusion genes generated by 'find_fusion_genes.py'."""
    version="%prog 0.10 beta"

    parser=optparse.OptionParser(usage=usage,description=description,version=version)

    parser.add_option("--input",
                      action="store",
                      type="string",
                      dest="input_fusion_genes_filename",
                      help="""The input file in text tab delimited format containing the fusion genes candidates produced by 'find_fusion_genes.py'. """)

    parser.add_option("--label",
                      action="store",
                      type="string",
                      dest="label",
                      help="""Label used to mark the candidate fusion genes which are founf in the filter.""")

    parser.add_option("--output_fusion_genes",
                      action="store",
                      type="string",
                      dest="output_fusion_genes_filename",
                      help="""The output text tab-separated file containing the candidate fusion genes which are found in the filter. The format is as the input file and sorted by counts column.""")




    (options,args) = parser.parse_args()

    # validate options
    if not (options.input_fusion_genes_filename and
            options.output_fusion_genes_filename and
            options.label
            ):
        parser.print_help()
        parser.error("One of the options has not been specified.")
        sys.exit(1)



    print "Reading...",options.input_fusion_genes_filename
    # Assume format:
    #Fusion_gene_1	Fusion_gene_2	Count_paired-end_reads	Fusion_gene_symbol_1	Fusion_gene_symbol_2
    #ENSG00000175110	ENSG00000233830	1	MRPS22
    #ENSG00000205246	ENSG00000233924	1	RPSAP18,RPSAP8,RPSAP58
    #ENSG00000103222	ENSG00000116857	1	ABCC1	TMEM9
    #...
    data=[line.rstrip('\r\n').split('\t') for line in file(options.input_fusion_genes_filename,'r').readlines() if line.rstrip('\r\n')]
    header=data.pop(0)
    # add the labels on column no. 6
    label_col = False
    if len(header) == 5:
        label_col = True
        header.append('Fusion_description')
    temp = []
    label = options.label


    for line in data:
        if is_znf_gene(line[3]) and is_znf_gene(line[4]):
            if label_col:
                temp.append(line+[label])
            else:
                if line[-1]:
                    temp.append(line[:-1]+[','.join([line[-1],label])])
                else:
                    temp.append(line[:-1]+[label])
        else:
            if label_col:
                temp.append(line+[''])
            else:
                temp.append(line)

    data=sorted(temp,key=lambda x: ( (-int(x[2]),x[0],x[1]) ) )
    data.insert(0,header)
    file(options.output_fusion_genes_filename,'w').writelines(['\t'.join(line)+'\n' for line in data])

    print "The end."
