#!/opt/conda/conda-bld/codoff_1762917807106/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh/bin/python

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import os
import sys
import argparse
# Add src to path to import codoff
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from codoff import codoff
import importlib.metadata

version = "NA"
try:
	package_name = "codoff"
	version = str(importlib.metadata.version(package_name))
except importlib.metadata.PackageNotFoundError:
	pass	

def create_parser():
	""" Parse arguments """
	parser = argparse.ArgumentParser(description="""
	Program: codoff
	Author: Rauf Salamzade
	Affiliation: Kalan Lab, UW Madison

	This program compares the codon-usage distribution of a focal-region/BGC 
	to the codon usage of the background genome. It will report the cosine 
	distance and Spearman correlation between the two profiles, as well as
	a discordance percentile indicating how unusual the focal region's codon 
	usage is compared to similarly sized genomic windows. Only CDS features 
	which are of length divisible by 3 will be considered. 
									 
	Two modes of input are supported:
									 
	1. (WORKS FOR BOTH EUKARYOTES & BACTERIA) Focal region and full-genome 
	   provided as GenBank files with CDS features (compatible with 
	   antiSMASH outputs). Multiple focal region GenBank files can be provided, 
	   e.g. consider a biosynthetic gene cluster split across multiple 
	   scaffolds due to assembly fragmentation. 
																		 
	   Example command: 
									 
	   $ codoff -f Sw_LK413/NZ_JALXLO020000001.1.region001.gbk -g Sw_LK413/LK413.gbk
	   $ codoff -f region.gbk -g genome.gbk --num-sims 5000
									 
	2. (WORKS ONLY FOR BACTERIA) Full genome is provided as a FASTA or GenBank 
	   file. If CDS features are missing gene calling is performed using 
	   pyrodigal. Afterwards, the focal region is determined through user
	   speciefied coordinates.
	
	   Example command:
									 
	   $ codoff -s NZ_JALXLO020000001.1 -a 341425 -b 388343 -g Sw_LK413/LK413.fna
	   $ codoff -s scaffold -a 1000 -b 5000 -g genome.fna --num-sims 20000 
 
	""", formatter_class=argparse.RawTextHelpFormatter)

	parser.add_argument('-g', '--full-genome', 
		help="Path to a full-genome in GenBank or FASTA format. If GenBank file\n"
			 "provided, CDS features are required.", 
		required=True, default=None)
	parser.add_argument('-s', '--scaffold', 
		help="Scaffold identifier for focal region.", 
		required=False, default=None)
	parser.add_argument('-a', '--start-coord', type=int, 
		help="Start coordinate for focal region.", 
		required=False, default=None)
	parser.add_argument('-b', '--end-coord', type=int, 
		help="End coordinate for focal region.", 
		required=False, default=None)
	parser.add_argument('-f', '--focal-genbanks', nargs='+', 
		help="Path to focal region GenBank(s) for isolate. Locus tags must match\n"
			 "with tags in full-genome GenBank.", 
		required=False, default=None)
	parser.add_argument('-o', '--outfile', 
		help="Path to output file [Default is standard output].", 
		required=False, default="stdout")
	parser.add_argument('-p', '--plot-outfile', 
		help="Plot output file name (will be in SVG format). If not provided, no\n"
			 "plot will be made.", 
		required=False, default=None)
	parser.add_argument('-ns', '--num-sims', type=int,
		help="Number of simulations to run [Default: 10000].",
		required=False, default=10000)
	parser.add_argument('-v', '--version', action='store_true', 
		help="Print version and exist", 
		required=False, default=False)
	parser.add_argument('-x', '--seed', type=int,
		help="Random seed for reproducible results [Default: 42].",
		required=False, default=42)
	parser.add_argument('-m', '--max-focal-cds-fraction', type=float,
		help="Maximum allowed fraction of total genome CDS length for focal region [Default: 0.05].",
		required=False, default=0.05)
	args = parser.parse_args()
	return args

def main():
	"""
	Void function which runs primary workflow for program.
	"""

	sys.stderr.write('Running version ' + str(version) + ' of codoff!\n')
	if len(sys.argv)>1 and ('-v' in set(sys.argv) or '--version' in set(sys.argv)):
		sys.exit(0)

	"""
	PARSE INPUTS
	"""
	myargs = create_parser()

	full_genome_file = os.path.abspath(myargs.full_genome)
	focal_scaffold = myargs.scaffold
	focal_start_coord = myargs.start_coord
	focal_end_coord = myargs.end_coord
	focal_genbank_files = myargs.focal_genbanks
	outfile = myargs.outfile
	plot_outfile = myargs.plot_outfile
	num_sims = myargs.num_sims
	seed = myargs.seed
	max_focal_cds_fraction = myargs.max_focal_cds_fraction

	"""
	START WORKFLOW
	"""

	if focal_genbank_files != None:
		codoff.codoff_main_gbk(
			full_genome_file=full_genome_file,
			focal_genbank_files=focal_genbank_files,
			outfile=outfile,
			plot_outfile=plot_outfile,
			verbose=True,
			num_sims=num_sims,
			seed=seed,
			max_focal_cds_fraction=max_focal_cds_fraction
		)
	elif focal_scaffold != None and focal_start_coord != None and focal_end_coord != None:
		codoff.codoff_main_coords(
			full_genome_file=full_genome_file,
			focal_scaffold=focal_scaffold,
			focal_start_coord=focal_start_coord,
			focal_end_coord=focal_end_coord,
			outfile=outfile,
			plot_outfile=plot_outfile,
			verbose=True,
			num_sims=num_sims,
			seed=seed,
			max_focal_cds_fraction=max_focal_cds_fraction
		)
	else:
		sys.stderr.write('Error: Insuffient input provided!\n')
		sys.exit(1)
	
if __name__ == '__main__':
	main()
