#!/opt/conda/conda-bld/codoff_1762917807106/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh/bin/python

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import os
import sys
import argparse
# Add src to path to import codoff
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from codoff import codoff
import importlib.metadata
import traceback

version = "NA"
try:
	package_name = "codoff"
	version = str(importlib.metadata.version(package_name))
except importlib.metadata.PackageNotFoundError:
	pass	

def create_parser():
	""" Parse arguments """
	parser = argparse.ArgumentParser(description="""
	Program: codoff
	Author: Rauf Salamzade
	Affiliation: Kalan Lab, UW Madison
		
	Simple wrapper of codoff for analysis of antiSMASH results for a 
	single genome. 
	""", formatter_class=argparse.RawTextHelpFormatter)

	parser.add_argument('-a', '--antismash-dir', 
		help="Path to annotated full-genome in GenBank or FASTA format for isolate's\n"
			 "genome.", 
		required=True)
	parser.add_argument('-o', '--output-dir', 
		help="Result direcotry.", 
		required=True)
	parser.add_argument('-v', '--version', action='store_true', 
		help="Print version and exist", 
		required=False, default=False)
	parser.add_argument('-ns', '--num-sims', type=int,
		help="Number of simulations per BGC [Default: 10000].",
		required=False, default=10000)
	parser.add_argument('-x', '--seed', type=int,
		help="Random seed for reproducible results [Default: 42].",
		required=False, default=42)
	parser.add_argument('-m', '--max-focal-cds-fraction', type=float,
		help="Maximum allowed fraction of total genome CDS length for focal region [Default: 0.05].",
		required=False, default=0.05)

	args = parser.parse_args()
	return args

def main():
	"""
	Void function which runs primary workflow for program.
	"""

	sys.stderr.write('Running version ' + str(version) + ' of codoff!\n')
	if len(sys.argv)>1 and ('-v' in set(sys.argv) or '--version' in set(sys.argv)):
		sys.exit(0)

	"""
	PARSE INPUTS
	"""
	myargs = create_parser()

	antismash_dir = os.path.abspath(myargs.antismash_dir) + '/'
	output_dir = os.path.abspath(myargs.output_dir) + '/'
	num_sims = myargs.num_sims
	seed = myargs.seed
	max_focal_cds_fraction = myargs.max_focal_cds_fraction

	try:
		assert(os.path.isdir(antismash_dir))
	except:
		sys.stderr.write('Error: Unable to validate antiSMASH directory exists.\n')
		sys.exit(1)

	if os.path.isdir(output_dir):
		sys.stderr.write('Error: Output directory already exists. Please provide a different output directory.\n')
		sys.exit(1)
	else:
		os.makedirs(output_dir)

	"""
	START WORKFLOW
	"""

	# Process antiSMASH results directory

	genome_gbk = None
	bgc_gbks = []
	for f in os.listdir(antismash_dir):
		if f.endswith('.gbk'):
			if not '.region' in f:
				genome_gbk = os.path.join(antismash_dir, f)
			elif '.region' in f:
				bgc_gbks.append(os.path.join(antismash_dir, f))

	if genome_gbk == None:
		sys.stderr.write('Unable to find full genome GenBank file in antiSMASH results directory.\n')
		sys.exit(1)
	
	if len(bgc_gbks) == 0:
		sys.stderr.write('Unable to find any BGC GenBank files in antiSMASH results directory.\n')
		sys.exit(1)
	
	msg = "Running codoff on %d BGC regions in %s" % (len(bgc_gbks), antismash_dir) 
	sys.stdout.write(msg + '\n')

	# Extract genome codon data once to avoid redundant computation
	sys.stdout.write('Extracting genome-wide codon usage data...\n')
	try:
		genome_data = codoff.extract_genome_codon_data(full_genome_file=genome_gbk, verbose=False)
		sys.stdout.write('Successfully extracted genome data for %d genes.\n' % len(genome_data['gene_list']))
	except:
		sys.stderr.write('Unable to extract genome codon usage data.\n')
		sys.stderr.write(traceback.format_exc() + '\n')
		sys.exit(1)

	# Process each BGC region using cached genome data
	for bgc_gbk in bgc_gbks:
		bgc_gbk_name = bgc_gbk.split('/')[-1].split('.gbk')[0]
		bgc_outfile = output_dir + bgc_gbk_name + '.txt'
		bgc_plotfile = output_dir + bgc_gbk_name + '.svg'
		
		try:
			codoff.codoff_main_gbk(
				full_genome_file=genome_gbk,
				focal_genbank_files=[bgc_gbk],
				outfile=bgc_outfile,
				plot_outfile=bgc_plotfile,
				verbose=False,
				num_sims=num_sims,
				genome_data=genome_data,
				seed=seed,
				max_focal_cds_fraction=max_focal_cds_fraction
			)
			msg = 'Successfully ran codoff for %s.' % bgc_gbk_name
			sys.stdout.write(msg + '\n')
		except:
			sys.stderr.write('Unable to process codoff inputs for %s.\n' % bgc_gbk_name)
			sys.stderr.write(traceback.format_exc() + '\n')
			sys.exit(1)
		
if __name__ == '__main__':
	main()
