#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from optparse import OptionParser
import sys
import os
from collections import defaultdict
import math
from bisect import bisect_left
try:
	import numpy as np
except ImportError as e:
	print('Error importing NumPy (%s). Please install NumPy.'%e.message, file=sys.stderr)
	sys.exit(1)
try:
	import matplotlib
	matplotlib.use('PDF') 
	import matplotlib.pyplot as plt
	import matplotlib.mlab as mlab
except ImportError as e:
	print('Error importing matplotlib (%s). Please install matplotlib.'%e.message, file=sys.stderr)
	sys.exit(1)

__author__ = "Tobias Marschall"

usage = """%prog [options] <distribution-file>

Plots the internal segment length distribution as computed by insert-length-histogram."""

def main():
	parser = OptionParser(usage=usage)
	parser.add_option("-o", action="store", dest="outputfile", default=None,
				help="Name of output file (default: <data-file>.pdf).")
	parser.add_option("-q", action="store", type=float, dest="tailmass", default=1e-4,
				help="Choose x-range such that at most the given mass is invisible in each tail (default=1e-4).")
	parser.add_option("-n", action="store", dest="normal", default=None,
				help="Additionally plot normal distribution with given mean and stddev (comma separated).")
	parser.add_option("--xrange", action="store", dest="x_range", default=None,
				help="Instead of using quantiles, use the given xrange (comma separated).")
	parser.add_option("--yrange", action="store", dest="y_range", default=None,
				help="Y-range to be used (default: auto).")
	(options, args) = parser.parse_args()
	if len(args)!=1:
		parser.print_help()
		sys.exit(1)
	assert 0 <= options.tailmass < 1
	inputfilename = args[0]
	table = [(int(fields[0]),int(fields[1]),float(fields[2])) for fields in (s.split() for s in open(inputfilename))]
	table = [ (left-0.5, right-left+1, prob/(right-left+1)) for left, right, prob in table ]
	lefts, widths, values = zip(*table)
	#assert abs(sum(values) - 1.0) < 0.001
	fig = plt.figure()
	ax = fig.add_subplot(111)
	ax.bar(lefts, values, width = widths, color = 'blue', edgecolor = 'blue')
	ax.set_xlabel("Internal segment size (excluding reads)")
	ax.set_ylabel("Relative frequency")
	ax.set_title(inputfilename)
	if options.x_range == None:
		# determine xmin
		xmin = lefts[0]
		left_sum = 0.0
		for i in range(len(lefts)):
			left_sum += values[i]
			if left_sum <= options.tailmass:
				xmin = lefts[i] + widths[i]
			else:
				break
		# determine xmax
		xmax = lefts[-1] + widths[-1]
		rigth_sum = 0.0
		for i in range(len(lefts)-1,-1,-1):
			rigth_sum += values[i]
			if rigth_sum <= options.tailmass:
				xmax = lefts[i] + widths[i]
			else:
				break
	else:
		xmin, xmax = (float(x) for x in options.x_range.split(','))
	ax.set_xlim(xmin,xmax)
	if options.y_range != None:
		ymin, ymax = (float(x) for x in options.y_range.split(','))
		ax.set_ylim(ymin,ymax)
	
	if options.normal != None:
		#import numpy as np
		#import matplotlib.mlab as mlab
		mean, stdev = [float(x) for x in options.normal.split(',')]
		x = np.linspace(xmin,xmax,500)
		ax.plot(x,mlab.normpdf(x,mean,stdev), color='red')

	if options.outputfile == None:
		plt.savefig(inputfilename + '.pdf', format='pdf') 
	else:
		plt.savefig(options.outputfile, format='pdf') 
	plt.close()

if __name__ == '__main__':
	sys.exit(main())
