// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
// SPDX-License-Identifier: BSD-3-Clause
//
// --------------------------------------------------------------------------
// $Maintainer: Chris Bielow $
// $Authors: Clemens Groepl, Andreas Bertsch, Chris Bielow $
// --------------------------------------------------------------------------

#pragma once

#include <OpenMS/CHEMISTRY/ISOTOPEDISTRIBUTION/IsotopePatternGenerator.h>
#include <OpenMS/CHEMISTRY/ISOTOPEDISTRIBUTION/IsotopeDistribution.h>

#include <set>

namespace OpenMS
{
  /**
    * @ingroup Chemistry
    * @brief Isotope pattern generator for coarse isotope distributions.
    *
    * This algorithm implements IsotopePatternGenerator and generates
    * theoretical pattern distributions for empirical formulas with resolution
    * of 1Da. It convolves the empirical abundances of each element in a
    * molecular formula, thus producing accurate intensities (probabilities)
    * for each isotopic peak. However, it will assume that every isotope has an
    * atomic mass that is rounded to the closest integer in Daltons, therefore
    * it produces coarse distributions (it does not discriminate between 13C,
    * N15 and O18 peaks).  For example, for a molecule that contains both
    * Carbon and Nitrogen, it will add up the probabilities for 13C and 15N,
    * ignoring the fact that their masses are (slightly) different. Therefore,
    * the probability distributions generated by the
    * CoarseIsotopePatternGenerator are accurate, but the masses (m/z) are only
    * approximately accurate.  In case you need fine resolution, please
    * consider using the FineIsotopePatternGenerator.
    *
    * The output is a list of pairs containing nominal isotope probabilities
    * paired with a number that is either an approximately accurate or rounded
    * (integer) mass. The accurate masses assume the nominal isotopes are
    * mostly due to (13)Carbon.  To return accurate vs rounded masses, use
    * setRoundMasses accordingly.  The default is to return accurate masses
    * (note that setting this option will not influence the probabilities and
    * still produce a coarse distributions spaced at ca 1Da). For example,
    * using rounded mass, for a C100 molecule, you will get:
    *
    * @code
    *     1200 : 0.341036528
    *     1201 : 0.368855864
    *     1202 : 0.197477505
    *     1203 : 0.0697715357
    * @endcode
    *
    * while accurate mass will produce:
    *
    * @code
    *     1200       : 0.341036528
    *     1201.00335 : 0.368855864
    *     1202.00671 : 0.197477505
    *     1203.01006 : 0.0697715357
    * @endcode
    *
    * The other important value which needs to be set is the max isotope value.
    * This value can be set using the setMaxIsotope method. It is an upper
    * bound for the number of isotopes which are calculated If e.g., set to 3,
    * only the first three isotopes, Monoisotopic mass, +1 and +2 are
    * calculated.
    *
    * @note By default all possible isotopes are calculated, which leads to a large
    * number of values, if the mass value is large!
    *
    * @note If you need fine isotope distributions, consider using the
    * FineIsotopePatternGenerator.
    *
    * See also method run()
    **/

  class OPENMS_DLLAPI CoarseIsotopePatternGenerator
    : public IsotopePatternGenerator
  {

 public:
    CoarseIsotopePatternGenerator(const Size max_isotope = 0, const bool round_masses = false);

    ~CoarseIsotopePatternGenerator() override;

    /// @name Accessors
    ///@{
    /** @brief sets the maximal isotope with @p max_isotope

            sets the maximal isotope which is included in the distribution
            and used to limit the calculations. This is useful as distributions
            with numerous isotopes tend to have a lot of numerical zeros at the end
    */
    void setMaxIsotope(const Size& max_isotope);

    /// round masses to integer values (true) or return accurate masses (false)
    void setRoundMasses(const bool round_masses);

    /// returns the currently set maximum isotope
    Size getMaxIsotope() const;

    /// returns the current value of the flag to return expected masses (true) or atomic numbers (false).
    bool getRoundMasses() const;
    ///@}

    /**
      * @brief Creates an isotope distribution from an empirical sum formula
      *
      * Iterates through all elements, convolves them according to the number
      * of atoms from that element and sums up the result.
      *
      **/
    IsotopeDistribution run(const EmpiricalFormula&) const override;

    /**
       @brief Estimate Peptide Isotopedistribution from weight and number of isotopes that should be reported

       Implementation using the averagine model proposed by Senko et al. in
       "Determination of Monoisotopic Masses and Ion Populations for Large Biomolecules from Resolved Isotopic Distributions"
    */
    IsotopeDistribution estimateFromPeptideWeight(double average_weight);

    /**
    @brief Estimate Peptide Isotopedistribution from monoisotopic weight and number of isotopes that should be reported

    Implementation using the averagine model proposed by Senko et al. in
      "Determination of Monoisotopic Masses and Ion Populations for Large Biomolecules from Resolved Isotopic Distributions"
      But this function takes monoisotopic mass. Thus determination of monoisotopic mass is not performed.
      */
      IsotopeDistribution estimateFromPeptideMonoWeight(double mono_weight);


    /**
       @brief Estimate peptide IsotopeDistribution from average weight and exact number of sulfurs

       @param average_weight: Average weight to estimate an EmpiricalFormula for
       @param S: The exact number of Sulfurs in this molecule

       @pre S <= average_weight / average_weight(sulfur)
       @pre average_weight >= 0
    */
    IsotopeDistribution estimateFromPeptideWeightAndS(double average_weight, UInt S);

    /**
       @brief roughly approximate peptide IsotopeDistribution from monoisotopic weight using Poisson distribution.
       m/z values approximated by adding one neutron mass (divided by charge) for every peak, starting at the given monoisotopic weight.
       Foundation from: Bellew et al, https://dx.doi.org/10.1093/bioinformatics/btl276

       This method is around 50 times faster than estimateFromPeptideWeight, but only an approximation. The following are the intensities 
       of the first 6 peaks generated for a monoisotopic mass of 1000:

       estimateFromPeptideWeight:    0.571133000;0.306181000;0.095811100;0.022036900;0.004092170;0.000644568
       approximateFromPeptideWeight: 0.573753000;0.318752000;0.088542200;0.016396700;0.002277320;0.000253036

       KL divergences of the first 20 intensities of estimateFromPeptideWeight and this approximation range from 4.97E-5 for a
       monoisotopic mass of 20 to 0.0144 for a mass of 2500. For comparison, when comparing an observed pattern with a 
       theoretical ground truth, the observed pattern is said to be an isotopic pattern if the KL between the two is below 0.05
       for 2 peaks and below 0.6 for >=6 peaks by Guo Ci Teo et al.

       @param mass m/z of monoisotopic peak (with charge = 1) to approximate the distribution of intensities for
       @param num_peaks: How many peaks should be generated (independent of this->max_isotope)
       @param charge Charge of the resulting distribution
    */
    static IsotopeDistribution approximateFromPeptideWeight(double mass, UInt num_peaks = 20, UInt charge = 1);

    /**
       @brief roughly approximate intensity distribution of peptidic isotope patterns from monoisotopic weight using Poisson distribution.
       Foundation from: Bellew et al, https://dx.doi.org/10.1093/bioinformatics/btl276

       This method is around 100 times faster than estimateFromPeptideWeight, but only an approximation of the intensities. 
       It does not return IsotopeDistribution but a vector of intensities. For an assessment of accuracy, see approximateFromPeptideWeight.

       @param mass: m/z of monoisotopic peak (with charge = 1) to approximate the distribution of intensities for
       @param num_peaks: How many peaks should be generated (independent of this->max_isotope)
    */
    static std::vector<double> approximateIntensities(double mass, UInt num_peaks = 20);

    /**
       @brief Estimate Nucleotide Isotopedistribution from weight and number of isotopes that should be reported

       averagine model from Zubarev, R. A.; Demirev, P. A. in
       "Isotope  depletion  of  large biomolecules: Implications for molecular mass measurements."
    */
    IsotopeDistribution estimateFromRNAWeight(double average_weight);

    /**
     @brief Estimate Nucleotide Isotopedistribution from monoisotopic weight and number of isotopes that should be reported

    averagine model from Zubarev, R. A.; Demirev, P. A. in
       "Isotope  depletion  of  large biomolecules: Implications for molecular mass measurements."
       */
   IsotopeDistribution estimateFromRNAMonoWeight(double mono_weight);


    /**
       @brief Estimate Nucleotide Isotopedistribution from weight and number of isotopes that should be reported
       averagine model from Zubarev, R. A.; Demirev, P. A. in
       "Isotope  depletion  of  large biomolecules: Implications for molecular mass measurements."
    */
    IsotopeDistribution estimateFromDNAWeight(double average_weight);

    /**

       @brief Estimate Isotopedistribution from weight, average composition, and number of isotopes that should be reported

    */
    IsotopeDistribution estimateFromWeightAndComp(double average_weight, double C, double H, double N, double O, double S, double P);

    /**

    @brief Estimate Isotopedistribution from monoisotopic weight, average composition, and number of isotopes that should be reported

      */
    IsotopeDistribution estimateFromMonoWeightAndComp(double mono_weight, double C, double H, double N, double O, double S, double P);


    /**
       @brief Estimate IsotopeDistribution from weight, exact number of sulfurs, and average remaining composition

       @param average_weight: Average weight to estimate an IsotopeDistribution for
       @param S: The exact numbers of Sulfurs in this molecule
       @param C: The approximate relative stoichiometry of Carbons to other elements (excluding Sulfur) in this molecule
       @param H: The approximate relative stoichiometry of Hydrogens to other elements (excluding Sulfur) in this molecule
       @param N: The approximate relative stoichiometry of Nitrogens to other elements (excluding Sulfur) in this molecule
       @param O: The approximate relative stoichiometry of Oxygens to other elements (excluding Sulfur) in this molecule
       @param P: The approximate relative stoichiometry of Phosphoruses to other elements (excluding Sulfur) in this molecule

       @pre S, C, H, N, O, P >= 0
       @pre average_weight >= 0
    */
    IsotopeDistribution estimateFromWeightAndCompAndS(double average_weight, UInt S, double C, double H, double N, double O, double P);

    /**
       @brief Estimate peptide fragment IsotopeDistribution from the precursor's average weight,
       fragment's average weight, and a list of isolated precursor isotopes.

       The max_depth of the isotopic distribution is set to max(precursor_isotopes)+1.
       @param average_weight_precursor: average weight of the precursor peptide
       @param average_weight_fragment: average weight of the fragment
       @param precursor_isotopes: the precursor isotopes that were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.

       @pre average_weight_precursor >= average_weight_fragment
       @pre average_weight_fragment > 0
       @pre average_weight_precursor > 0
       @pre precursor_isotopes.size() > 0
    */
    IsotopeDistribution estimateForFragmentFromPeptideWeight(double average_weight_precursor, double average_weight_fragment, const std::set<UInt>& precursor_isotopes);

    /**
       @brief Estimate peptide fragment IsotopeDistribution from the precursor's average weight,
       number of sulfurs in the precursor, fragment's average weight, number of sulfurs in the fragment,
       and a list of isolated precursor isotopes.

       The max_depth of the isotopic distribution is set to max(precursor_isotopes)+1.
       @param average_weight_precursor: average weight of the precursor peptide
       @param S_precursor: The exact number of Sulfurs in the precursor peptide
       @param average_weight_fragment: average weight of the fragment
       @param S_fragment: The exact number of Sulfurs in the fragment
       @param precursor_isotopes: the precursor isotopes that were isolated

       @pre S_fragment <= average_weight_fragment / average_weight(sulfur)
       @pre S_precursor - S_fragment <= (average_weight_precursor - average_weight_fragment) / average_weight(sulfur)
       @pre average_weight_precursor >= average_weight_fragment
       @pre average_weight_precursor > 0
       @pre average_weight_fragment > 0
       @pre precursor_isotopes.size() > 0
    */
    IsotopeDistribution estimateForFragmentFromPeptideWeightAndS(double average_weight_precursor, UInt S_precursor, double average_weight_fragment, UInt S_fragment, const std::set<UInt>& precursor_isotopes) const;

    /**
       @brief Estimate RNA fragment IsotopeDistribution from the precursor's average weight,
       fragment's average weight, and a list of isolated precursor isotopes.

       The max_depth of the isotopic distribution is set to max(precursor_isotopes)+1.
       @param average_weight_precursor: average weight of the precursor nucleotide
       @param average_weight_fragment: average weight of the fragment
       @param precursor_isotopes: the precursor isotopes that were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.

       @pre average_weight_precursor >= average_weight_fragment
       @pre average_weight_precursor > 0
       @pre average_weight_fragment > 0
       @pre precursor_isotopes.size() > 0
    */
    IsotopeDistribution estimateForFragmentFromRNAWeight(double average_weight_precursor, double average_weight_fragment, const std::set<UInt>& precursor_isotopes);

    /**
       @brief Estimate DNA fragment IsotopeDistribution from the precursor's average weight,
       fragment's average weight, and a list of isolated precursor isotopes.

       The max_depth of the isotopic distribution is set to max(precursor_isotopes)+1.
       @param average_weight_precursor: average weight of the precursor nucleotide
       @param average_weight_fragment: average weight of the fragment
       @param precursor_isotopes: the precursor isotopes that were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.

       @pre average_weight_precursor >= average_weight_fragment
       @pre average_weight_precursor > 0
       @pre average_weight_fragment > 0
       @pre precursor_isotopes.size() > 0
    */
    IsotopeDistribution estimateForFragmentFromDNAWeight(double average_weight_precursor, double average_weight_fragment, const std::set<UInt>& precursor_isotopes);

    /**
       @brief Estimate fragment IsotopeDistribution from the precursor's average weight,
       fragment's average weight, a list of isolated precursor isotopes, and average composition

       The max_depth of the isotopic distribution is set to max(precursor_isotopes)+1.
       @param average_weight_precursor: average weight of the precursor molecule
       @param average_weight_fragment: average weight of the fragment molecule
       @param precursor_isotopes: the precursor isotopes that were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.
       @param C: The approximate relative stoichiometry of Carbons to other elements in this molecule
       @param H: The approximate relative stoichiometry of Hydrogens to other elements in this molecule
       @param N: The approximate relative stoichiometry of Nitrogens to other elements in this molecule
       @param O: The approximate relative stoichiometry of Oxygens to other elements in this molecule
       @param S: The approximate relative stoichiometry of Sulfurs to other elements in this molecule
       @param P: The approximate relative stoichiometry of Phosphoruses to other elements in this molecule

       @pre S, C, H, N, O, P >= 0
       @pre average_weight_precursor >= average_weight_fragment
       @pre average_weight_precursor > 0
       @pre average_weight_fragment > 0
       @pre precursor_isotopes.size() > 0
    */
    IsotopeDistribution estimateForFragmentFromWeightAndComp(double average_weight_precursor, double average_weight_fragment, const std::set<UInt>& precursor_isotopes, double C, double H, double N, double O, double S, double P) const;

    /**
       @brief Calculate isotopic distribution for a fragment molecule

       This calculates the isotopic distribution for a fragment molecule given
       the isotopic distribution of the fragment and complementary fragment (as
       if they were precursors), and which precursor isotopes were isolated.

       @note Do consider normalising the distribution afterwards to get conditional probabilities.

       Equations come from Rockwood, AL; Kushnir, MA; Nelson, GJ. in
       "Dissociation of Individual Isotopic Peaks: Predicting Isotopic Distributions of Product Ions in MSn"

       @param fragment_isotope_dist the isotopic distribution of the fragment (as if it was a precursor).
       @param comp_fragment_isotope_dist the isotopic distribution of the complementary fragment (as if it was a precursor).
       @param precursor_isotopes a list of which precursor isotopes were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.
       @param fragment_mono_mass the monoisotopic mass of the fragment.
       @pre fragment_isotope_dist and comp_fragment_isotope_dist are gapless (no missing isotopes between the min/max isotopes of the dist)
    */
    IsotopeDistribution calcFragmentIsotopeDist(const IsotopeDistribution& fragment_isotope_dist, const IsotopeDistribution& comp_fragment_isotope_dist, const std::set<UInt>& precursor_isotopes, const double fragment_mono_mass) const;

    CoarseIsotopePatternGenerator& operator=(const CoarseIsotopePatternGenerator& iso);

    /// convolves the distributions @p left and @p right and stores the result in @p result
    IsotopeDistribution::ContainerType convolve(const IsotopeDistribution::ContainerType& left, const IsotopeDistribution::ContainerType& right) const;

  protected:

    /// convolves the distribution @p input @p factor times and stores the result in @p result
    IsotopeDistribution::ContainerType convolvePow_(const IsotopeDistribution::ContainerType& input, Size factor) const;

    /// convolves the distribution @p input with itself and stores the result in @p result
    IsotopeDistribution::ContainerType convolveSquare_(const IsotopeDistribution::ContainerType& input) const;

    /// converts the masses of distribution @p input from atomic numbers to accurate masses
    IsotopeDistribution::ContainerType correctMass_(const IsotopeDistribution::ContainerType& input, const double mono_weight) const;

    /** @brief calculates the fragment distribution for a fragment molecule and stores it in @p result.

        @param fragment_isotope_dist the isotopic distribution of the fragment (as if it was a precursor).
        @param comp_fragment_isotope_dist the isotopic distribution of the complementary fragment (as if it was a precursor).
        @param precursor_isotopes which precursor isotopes were isolated. 0 corresponds to the mono-isotopic molecule (M0), 1->M1, etc.
    */
    IsotopeDistribution calcFragmentIsotopeDist_(const IsotopeDistribution::ContainerType& fragment_isotope_dist, const IsotopeDistribution::ContainerType& comp_fragment_isotope_dist, const std::set<UInt>& precursor_isotopes) const;

    /// fill a gapped isotope pattern (i.e. certain masses are missing), with zero probability masses
    IsotopeDistribution::ContainerType fillGaps_(const IsotopeDistribution::ContainerType& id) const;

    /// maximal isotopes which is used to calculate the distribution
    Size max_isotope_;
    /// flag to determine whether masses should be rounded or not
    bool round_masses_;

  };

} // namespace OpenMS
