#!/opt/conda/conda-bld/zol_1760213924798/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold/bin/python

"""
Program: zol-scape
Author: Rauf Salamzade
Kalan Lab
UW Madison, Department of Medical Microbiology and Immunology
"""

# BSD 3-Clause License
#
# Copyright (c) 2023-2025, Kalan-Lab
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: 
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, 
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


from collections import defaultdict
from zol.util import get_excel_columns
from rich_argparse import RawTextRichHelpFormatter
from time import sleep
from zol import util
import argparse
import multiprocessing
import os
import pandas as pd
import sys

os.environ["OMP_NUM_THREADS"] = "1"
def create_parser(): 
    """ Parse arguments """
    parser = argparse.ArgumentParser(description = """
    Program: zol-scape
    Author: Rauf Salamzade
    Affiliation: Kalan Lab, UW Madison, Department of Medical Microbiology and
        Immunology

    zolscape is a wrapper which runs zol for each GCF identified by BiG-SCAPE analysis to
    complement CORASON analysis.

    You can run from start to finish as such (will run 10 zol jobs at once, each using
    4 threads): 

    zol-scape -i Input_Folder_to_BiG-SCAPE/ -r Result_Folder_to_BiG-SCAPE/ \\
              -o ZOL-SCAPE_Results/ -j 10 -c 4

    If you are have access to an HPC and would like to parallelize using that - you
    can use the --print-mode option to create a task file which lists individual zol
    commands per line which can be parallelized on an HPC via a job array: 

    zol-scape -i Input_Folder_to_BiG-SCAPE/ -r Result_Folder_to_BiG-SCAPE/ \\
              -o ZOL-SCAPE_Results/ -p -c 4

    Then, after you run the printed list of zol commands, to create a finalized
    consolidated tsv/xlsx of zol results across all GCFs, simply run the command
    again, same as before: 

    zol-scape -i Input_Folder_to_BiG-SCAPE/ -r Result_Folder_to_BiG-SCAPE/ \\
              -o ZOL-SCAPE_Results/ -p -c 4
    """, formatter_class = RawTextRichHelpFormatter)

    parser.add_argument('-i', 
        '--big-scape-input', 
        help = "Path to folder which was input into BiG-SCAPE.", 
        required = True)
    parser.add_argument('-r', 
        '--big-scape-results', 
        help = "Path to BiG-SCAPE results directory - will be searched recursively for 'clustering' files.", 
        required = True)
    parser.add_argument('-o', '--outdir', help = "Output directory.", required = True)
    parser.add_argument('-z', 
        '--zol-parameters', 
        help = "Parameters to pass to zol - please surround by double quotes [Default is ''].", 
        required = False, 
        default = "")
    parser.add_argument('-p', 
        '--print-mode', 
        action = 'store_true', 
        help = "Print zol commands - one per line - and exit - to allow for\n"
               "parallelizing on an HPC using a job array.", 
        required = False, 
        default = False)
    parser.add_argument('-c', 
        '--threads', 
        type = int, 
        help = "The number of threads to use per zol job [Default is 1].", 
        required = False, 
        default = 1)
    parser.add_argument('-j', 
        '--jobs', 
        type = int, 
        help = "The number of parallel zol jobs to run at once [Default is 1].", 
        required = False, 
        default = 1)
    args = parser.parse_args()
    return args

def zolscape(): 
    myargs = create_parser()

    bigscape_input_dir = os.path.abspath(myargs.big_scape_input) + '/'
    bigscape_results_dir = os.path.abspath(myargs.big_scape_results) + '/'
    outdir = os.path.abspath(myargs.outdir) + '/'
    zol_parameters = myargs.zol_parameters
    print_mode = myargs.print_mode
    threads = myargs.threads
    jobs = myargs.jobs

    # create output directory if needed, or warn of over-writing
    if os.path.isdir(outdir):
        sys.stderr.write("Output directory exists. Files will be overwritten, but\n"
                       "checkpoints will be used to avoid redoing successfully\n"
                       "completed steps.\n"
                       "Do you wish to proceed? (yes/no): ")
        user_input = input().strip().lower()
        if user_input not in ['yes', 'y']:
            sys.stderr.write("Execution cancelled by user.\n")
            sys.exit(0)
    else:
        util.setup_ready_directory([outdir], delete_if_exist = False)

    try: 
        assert(os.path.isdir(bigscape_input_dir) and os.path.isdir(bigscape_results_dir))
    except Exception as e: 
        msg = 'Either the BiG-SCAPE input directory or results directory could not be validated as a directory - please check the paths provided!'
        sys.stderr.write(msg + '\n')
        sys.exit(1)

    # create logging object
    log_file = outdir + 'Progress.log'
    log_object = util.create_logger_object(log_file)
    version_string = util.get_version()

    sys.stdout.write(f'Running zol-scape version {version_string}\n')
    log_object.info(f'Running zol-scape version {version_string}')

    # log command used
    parameters_file = outdir + 'Command_Issued.txt'
    parameters_handle = open(parameters_file, 'a+')
    parameters_handle.write(' '.join(sys.argv) + '\n')
    parameters_handle.close()

    # Step 1: Parse BGCs from input dir
    msg = '--------------------\nStep 1: Determine BGC GenBank paths in BiG-SCAPE input directory\n--------------------'

    bgc_ids_to_paths = defaultdict(lambda: None)
    for currentpath, folders, files in os.walk(bigscape_input_dir): 
        for file in files: 
            if file.endswith('.gbk'): 
                gbk_file = os.path.join(currentpath, file)
                bgc_id = '.'.join(file.split('.')[: -1])
                bgc_ids_to_paths[bgc_id] = gbk_file

    # Step 2: Parse GCFs from results dir
    msg = '--------------------\nStep 2: Parse GCF clustering info from BiG-SCAPE results directory\n--------------------'

    cluster_gcf_bgcs = defaultdict(lambda: defaultdict(list))
    for currentpath, folders, files in os.walk(bigscape_results_dir): 
        for file in files: 
            if '_clustering_' in file and file.endswith('.tsv'): 
                cluster_file = currentpath + '/' + file
                with open(cluster_file) as ocf: 
                    for line in ocf: 
                        if line.startswith('#'): continue
                        line = line.strip()
                        bgc_id, gcf_id = line.split('\t')
                        cluster_gcf_bgcs[cluster_file][gcf_id].append(bgc_id)

    # Step 3: Run zol or create zol task file
    zol_results_dir = outdir + 'zol_Results/'
    if not os.path.isdir(zol_results_dir): 
        util.setup_ready_directory([zol_results_dir], delete_if_exist = True)

    zol_results = 0
    for item in os.listdir(zol_results_dir): 
        zol_results += 1

    if zol_results == 0: 
        zol_cmds = []
        for i, cluster in enumerate(sorted(cluster_gcf_bgcs)): 
            for gcf in cluster_gcf_bgcs[cluster]: 
                uniq_gcf_id = 'ClusterFile_' + str(i) + '_' + gcf
                output_dir = zol_results_dir + uniq_gcf_id + '/'
                bgc_ids_in_gcf = 0
                bgc_gbks_found = 0
                input_gbks = []
                for bgc in cluster_gcf_bgcs[cluster][gcf]: 
                    bgc_ids_in_gcf += 1
                    if bgc in bgc_ids_to_paths: 
                        bgc_gbk_file = bgc_ids_to_paths[bgc]
                        input_gbks.append(bgc_gbk_file)
                        bgc_gbks_found += 1
                    else: 
                        msg = f'Warning: unable to find GenBank for BGC {bgc}'
                        sys.stderr.write(msg + '\n')
                        log_object.warning(msg)
                if bgc_gbks_found != bgc_ids_in_gcf:                     
                    msg = f'For clustering results {cluster} for GCF {gcf},\n'
                    msg += f'{bgc_ids_in_gcf} BGCs were reported and {bgc_gbks_found} BGC GenBank files were found.\n'
                    msg += 'Missing BGC GenBank files could be because they are MIBiG reference\n'
                    msg += 'BGCs if BiG-SCAPE was run using the --mibig arugment or --include-singletons.'
                    sys.stderr.write(msg + '\n')
                    log_object.warning(msg)
                if len(input_gbks) > 0: 
                    zol_cmd = ['zol', 
                                '-c', 
                                str(threads), 
                                zol_parameters, 
                                '-o', 
                                output_dir, 
                                '-i'] + input_gbks
                    zol_cmds.append(zol_cmd)
                else: 
                    msg = f'Warning: no BGC GenBanks found for GCF {gcf} in clustering results file {cluster}'
                    sys.stderr.write(msg + '\n')
                    log_object.warning(msg)

        if print_mode: 
            outf = open(outdir + 'zol.cmds')
            outf.write('\n'.join([' '.join(cmd) for cmd in zol_cmd]) + '\n')
            outf.close()
            msg = f'Wrote zol commands for individual GCFs to {outdir + "zol.cmds"}'
            log_object.info(msg)
            sys.stderr.write(msg + '\n')
            sys.exit(0)
        else: 
            # Use robust error handling for zol analysis runs
            result_summary = util.robust_multiprocess_executor(
                worker_function=util.multi_process_safe,
                inputs=zol_cmds,
                pool_size=jobs,
                error_strategy="report_and_continue",
                log_object=log_object,
                description="zol gene cluster analysis"
            )

            success_prop = result_summary['success_count'] / result_summary['total_processed']
            msg = f"{success_prop*100.0}% of zol runs were successful."
            sys.stdout.write(msg + '\n')
            log_object.info(msg)

    else: 
        msg = 'Note, something was found in the zol result\'s directory from a previous run, will attempt to use available results.'
        sys.stderr.write(msg + '\n')
        log_object.info(msg)

    consolidated_table_file = outdir + 'zol-scape_Results.tsv'
    zctf_handle = open(consolidated_table_file, 'w')
 
    domain_mode = False
    if ' -dom ' in zol_parameters or ' --domain-mode ' in zol_parameters: 
        domain_mode = True

    header = [
        'Clustering File Path',
        'GCF ID',
        'Ortholog Group (OG) ID',
        'OG is Single Copy?',
        'Proportion of Total Gene Clusters with OG',
        'OG Median Length (bp)', 'OG Consensus Order',
        'OG Consensus Direction',
        'Custom Annotation (E-value)',
        'PGAP Annotation (E-value)',
        'Pfam Domains'
    ]
    if domain_mode:
        header = [
            'Clustering File Path',
            'GCF ID',
                             'Ortholog Group (OG) ID', 
                             'OG is Single Copy?', 
            'Proportion of Total Gene Clusters with OG',
                             'OG Median Length (bp)', 
                             'Single-Linkage Full Protein Cluster', 
            'OG Consensus Order',
            'OG Consensus Direction',
            'Custom Annotation (E-value)',
            'PGAP Annotation (E-value)',
            'Pfam Domains'
        ]
 
    evolutionary_stats_header = ['Tajima\'s D', 'Proportion of Filtered Codon Alignment is Segregating Sites',
                             'Entropy', 'Upstream Region Entropy', 'Median Beta-RD-gc', 'Max Beta-RD-gc', 
                             'Proportion of sites which are highly ambiguous in codon alignment', 
                                 'Proportion of sites which are highly ambiguous in trimmed codon alignment',
                                 'Median GC', 'Median GC Skew']
    peptide_header = ['BGC score (GECCO weights)', 'Viral score (V-Score)', 'Hydrophobicity Mean',
                             'Hydrophobicity Std Dev', 'Aliphatic Index Mean', 'Aliphatic Index Std Dev', 
                             'm/z Mean', 'm/z Std Dev']
 
    hyphy_header = []
    if '-s' in zol_parameters or '--selection-analysis' in zol_parameters: 
        hyphy_header = ['GARD Partitions Based on Recombination Breakpoints',
                             'Number of Sites Identified as Under Positive or Negative Selection by FUBAR', 
                             'Average delta(Beta, Alpha) by FUBAR across sites', 
                             'Proportion of Sites Under Selection which are Positive',
                             'P-value for gene-wide episodic selection by BUSTED']
 
    annotation_header = ['KO Annotation (E-value)', 'PaperBLAST Annotation (E-value)', 'CARD Annotation (E-value)',
                         'IS Finder (E-value)', 'MIBiG Annotation (E-value)', 'VOG Annotation (E-value)',
                         'VFDB Annotation (E-value)']
 
    zol_sheet_header = header + evolutionary_stats_header + peptide_header + hyphy_header + annotation_header + \
                       ['CDS Locus Tags', 'OG Consensus Sequence']

    zctf_handle.write('\t'.join(zol_sheet_header) + '\n')
    num_rows = 1
    for i, cluster in enumerate(sorted(cluster_gcf_bgcs)): 
        for gcf in cluster_gcf_bgcs[cluster]: 
            uniq_gcf_id = 'ClusterFile_' + str(i) + '_' + gcf
            zol_res_dir = zol_results_dir + uniq_gcf_id + '/'
            # ^ basically added two columns (Clustering file ID and GCF ID) and took away one (custom db annotation)
            gcf_result_file = zol_res_dir + '/Final_Results/Consolidated_Report.tsv'
            if not os.path.isfile(gcf_result_file): continue
            with open(gcf_result_file) as ogrf: 
                for j, line in enumerate(ogrf): 
                    if j == 0: continue
                    line = line.strip()
                    ls = line.split('\t')
                    row = [cluster, gcf] + ls
                    zctf_handle.write('\t'.join(row) + '\n')
                    num_rows += 1
    zctf_handle.close()

    zr_numeric_columns = set(['Proportion of Total Gene Clusters with OG', 
                              'Proportion of Complete Gene Clusters with OG', 
                              'OG Median Length (bp)', 'OG Consensus Order', 
                              'Tajima\'s D', 'GARD Partitions Based on Recombination Breakpoints', 
                              'GARD Partitions Based on Recombination Breakpoints', 
                              'Number of Sites Identified as Under Positive or Negative Selection by FUBAR', 
                              'Average delta(Beta, Alpha) by FUBAR across sites', 
                              'Proportion of Sites Under Selection which are Positive', 
                              'Proportion of Filtered Codon Alignment is Segregating Sites', 
                              'Entropy', 'Upstream Region Entropy', 'Median Beta-RD-gc', 'Max Beta-RD-gc', 
                              'Proportion of sites which are highly ambiguous in codon alignment', 
                              'Proportion of sites which are highly ambiguous in trimmed codon alignment', 
                              'Median GC', 'Median GC Skew', 'BGC score (GECCO weights)', 'Viral score (V-Score)', 
                              'Hydrophobicity Mean', 'Hydrophobicity Std Dev', 'Aliphatic Index Mean', 
                              'Aliphatic Index Std Dev', 'm/z Mean', 'm/z Std Dev'])

    zr_data = util.load_table_in_panda_data_frame(consolidated_table_file, 
        zr_numeric_columns)

    # construct spreadsheet
    zol_spreadsheet_file = outdir + 'zol-scale_Results.xlsx'
    writer = pd.ExcelWriter(zol_spreadsheet_file, engine = 'xlsxwriter')
    workbook = writer.book

    warn_format = workbook.add_format(
        {"bg_color": "#bf241f", "bold": True, "font_color": "#FFFFFF", "border": 1, "border_color": "#DCDCDC"}
    )
    na_format = workbook.add_format(
        {"font_color": "#a6a6a6", "bg_color": "#FFFFFF", "italic": True, "border": 1, "border_color": "#DCDCDC"}
    )
    header_format = workbook.add_format(
        {
            "bold": True,
            "text_wrap": False,
            "valign": "top",
            "fg_color": "#FFFFFF",
            "font_color": "#000000",
            "border": 1,
            "border_color": "#DCDCDC",
        }
    )
    gecco_format = workbook.add_format(
        {"bg_color": "#d5abde", "border": 1, "border_color": "#DCDCDC"}
    )

    zr_data.to_excel(writer, 
        sheet_name = 'zol-scape Results', 
        index = False, 
        na_rep = "NA")
    zr_sheet = writer.sheets['zol-scape Results']

    border_format = workbook.add_format({'border': 1, 'border_color': '#DCDCDC'})
    zr_sheet.set_column(0, len(zr_data.columns) - 1, None, border_format)
    
    # Apply header formatting directly to header row cells
    for col_num in range(len(zr_data.columns)):
        zr_sheet.write(0, col_num, zr_data.columns[col_num], header_format)

    zr_sheet.conditional_format(
        f"A2:{get_excel_columns()[len(zr_data.columns)-1]}{num_rows}",
        {
            "type": "cell",
            "criteria": "==",
            "value": '"NA"',
            "format": na_format,
        },
    )

    excel_cols = get_excel_columns()
    col_map = {col: excel_cols[i] for i, col in enumerate(zr_data.columns)}

    col = 'OG is Single Copy?'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "cell",
                "criteria": "==",
                "value": '"False"',
                "format": warn_format,
            },
        )

    # prop gene-clusters with hg
    col = 'Proportion of Total Gene Clusters with OG'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#CCCCCC",
        "min_value": 0.0, 
        "max_value": 1.0, 
                "min_type": "num",
                "max_type": "num",
            },
        )

    # gene-lengths
    col = 'OG Median Length (bp)'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#5A8AC6",
        "min_value": 100, 
        "max_value": 2500, 
                "min_type": "num",
                "max_type": "num",
            },
        )

    # taj-d
    col = "Tajima's D"
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "3_color_scale",
                "min_color": "#E6B0AA",
                "mid_color": "#FFFFFF",
                "max_color": "#B8CCE4",
                "min_value": -2.0,
                "mid_value": 0.0,
                "max_value": 2.0,
                "min_type": "num",
                "mid_type": "num",
                "max_type": "num",
            },
        )

    # Entropy
    col = 'Entropy'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#F86B6B",
                "min_value": 0.0,
                "max_value": 1.0,
                "min_type": "num",
                "max_type": "num",
            },
        )
    # Upstream Entropy
    col = 'Upstream Region Entropy'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#F86B6B",
                "min_value": 0.0,
                "max_value": 1.0,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # Beta-RD
    col1 = 'Median Beta-RD-gc'
    col2 = 'Max Beta-RD-gc'
    if col1 in col_map and col2 in col_map:
        cell_range = f"{col_map[col1]}2:{col_map[col2]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "3_color_scale",
                "min_color": "#F8696B",
                "mid_color": "#FFEB84",
                "max_color": "#63BE7B",
                "min_value": 0.75,
                "mid_value": 1.0,
                "max_value": 1.25,
                "min_type": "num",
                "mid_type": "num",
                "max_type": "num",
            },
        )

    # proportion ambiguous
    col1 = 'Proportion of sites which are highly ambiguous in codon alignment'
    col2 = 'Proportion of sites which are highly ambiguous in trimmed codon alignment'
    if col1 in col_map and col2 in col_map:
        cell_range = f"{col_map[col1]}2:{col_map[col2]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#E6B0AA",
                "min_value": 0.0,
                "max_value": 1.0,
                "min_type": "num",
                "max_type": "num",
            },
        )

    if '-s' in zol_parameters or '--selection-analysis' in zol_parameters:
        # GARD Partitions
        col = 'GARD Partitions Based on Recombination Breakpoints'
        if col in col_map:
            cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
            zr_sheet.conditional_format(
                cell_range,
                {
                    "type": "2_color_scale",
                    "min_color": "#FFFFFF",
                    "max_color": "#F8696B",
                    "min_value": 1,
                    "max_value": 5,
                    "min_type": "num",
                    "max_type": "num",
                },
            )
        # FUBAR sites
        col = 'Number of Sites Identified as Under Positive or Negative Selection by FUBAR'
        if col in col_map:
            cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
            zr_sheet.conditional_format(
                cell_range,
                {
                    "type": "2_color_scale",
                    "min_color": "#FFFFFF",
                    "max_color": "#B8D4B8",
                    "min_value": 0,
                    "max_value": 10,
                    "min_type": "num",
                    "max_type": "num",
                },
            )
        # FUBAR dba
        col = 'Average delta(Beta, Alpha) by FUBAR across sites'
        if col in col_map:
            cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
            zr_sheet.conditional_format(
                cell_range,
                {
                    "type": "3_color_scale",
                    "min_color": "#E6B0AA",
                    "mid_color": "#FFFFFF",
                    "max_color": "#B8D4B8",
                    "min_value": -5,
                    "mid_value": 0,
                    "max_value": 5,
                    "min_type": "num",
                    "mid_type": "num",
                    "max_type": "num",
                },
            )
        # FUBAR prop
        col = 'Proportion of Sites Under Selection which are Positive'
        if col in col_map:
            cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
            zr_sheet.conditional_format(
                cell_range,
                {
                    "type": "3_color_scale",
                    "min_color": "#E6B0AA",
                    "mid_color": "#FFFFFF",
                    "max_color": "#B8D4B8",
                    "min_value": 0,
                    "mid_value": 0.5,
                    "max_value": 1,
                    "min_type": "num",
                    "mid_type": "num",
                    "max_type": "num",
                },
            )
        # BUSTED pval
        col = 'P-value for gene-wide episodic selection by BUSTED'
        if col in col_map:
            cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
            zr_sheet.conditional_format(
                cell_range,
                {
                    "type": "2_color_scale",
                    "min_color": "#63BE7B",
                    "max_color": "#FFFFFF",
                    "min_value": 0,
                    "max_value": 0.05,
                    "min_type": "num",
                    "max_type": "num",
                },
            )

    # GC
    col = 'Median GC'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#63BE7B",
                "min_value": 0.0,
                "max_value": 1.0,
                "min_type": "num",
                "max_type": "num",
            },
        )
            
    # Median GC Skew
    col = 'Median GC Skew'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "3_color_scale",
                "min_color": "#E8D5D5",
                "mid_color": "#FFFFFF",
                "max_color": "#D5E8F5",
                "min_value": -0.2,
                "mid_value": 0.0,
                "max_value": 0.2,
                "min_type": "num",
                "mid_type": "num",
                "max_type": "num",
            },
        )
        
    # BGC score (GECCO)
    col = 'BGC score (GECCO weights)'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "cell",
                "criteria": ">",
                "value": 2,
                "format": gecco_format,
            },
        )

    # viral score
    col = 'Viral score (V-Score)'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#E6B0AA",
                "min_value": 0,
                "max_value": 4,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # Hydrophobicity Mean
    col = 'Hydrophobicity Mean'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "3_color_scale",
                "min_color": "#5A8AC6",
                "mid_color": "#FFFFFF",
                "max_color": "#F8696B",
                "min_value": -2.5,
                "mid_value": 0,
                "max_value": 2.5,
                "min_type": "num",
                "mid_type": "num",
                "max_type": "num",
            },
        )

    # Hydrophobicity Std Dev
    col = 'Hydrophobicity Std Dev'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#F8696B",
                "min_value": 0,
                "max_value": 2,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # Aliphatic Index Mean
    col = 'Aliphatic Index Mean'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#63BE7B",
                "min_value": 50,
                "max_value": 150,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # Aliphatic Index Std Dev
    col = 'Aliphatic Index Std Dev'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#FFFFFF",
                "max_color": "#F8696B",
                "min_value": 0,
                "max_value": 30,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # m/z Mean
    col = 'm/z Mean'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#dcedde",
                "max_color": "#87bba2",
                "min_value": 5000,
                "max_value": 75000,
                "min_type": "num",
                "max_type": "num",
            },
        )

    # m/z Std Dev
    col = 'm/z Std Dev'
    if col in col_map:
        cell_range = f"{col_map[col]}2:{col_map[col]}{num_rows}"
        zr_sheet.conditional_format(
            cell_range,
            {
                "type": "2_color_scale",
                "min_color": "#dcedde",
                "max_color": "#87bba2",
                "min_value": 0,
                "max_value": 5000,
                "min_type": "num",
                "max_type": "num",
            },
            )

    # Freeze the first row of both sheets
    zr_sheet.freeze_panes(1, 0)

    # close workbook
    workbook.close()

    sys.stdout.write(f'Done running zol-scape!\nFinal spreadsheet can be found at: \n{zol_spreadsheet_file}\n')
    log_object.info(f'Done running zol-scape!\nFinal spreadsheet can be found at: \n{zol_spreadsheet_file}\n')

    # Close logging object and exit
    util.close_logger_object(log_object)
    sys.exit(0)

if __name__ == '__main__':
    zolscape()
