/**
 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.h
 * \date 24/03/2025
 * \author Aurélien Berthier
 * \brief SpecPeptidOMS Spectrum
 *
 * C++ implementation of the SpecPeptidOMS algorithm described in :
 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
 * https://doi.org/10.1021/acs.jproteome.4c00870.
 */

/*
 * Copyright (c) 2025 Aurélien Berthier
 * <aurelien.berthier@ls2n.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include <cstdint>
#include "pappsomspp/core/massspectrum/qualifiedmassspectrum.h"
#include "pappsomspp/core/processing/specglob/experimentalspectrum.h"
#include "pappsomspp/core/amino_acid/aacode.h"

namespace pappso
{
namespace specpeptidoms
{

struct AaPosition
{
  std::size_t r_peak, l_peak, next_l_peak; // If the left peak is not supported (l_support ==
                                           // false), then l_peak = next_l_peak
  /**
   * Condition is a 32 bit integer used to compute the threePeaks condition of the program.
   * The first bit is put to 1 if the AaPosition's left peak has no other amino acid directly on its
   * left (i.e. if l_support is false) and is not the spectrum's first peak.
   * The second bit is put to one if the AaPosition's left peak is the spectrum's first peak.
   * The next 22 bits are used as a one-hot encoding of the 22 amino acids. If an amino acid is
   * found to the left of the AaPosition, its corresponding bit is put to 1.
   * To check if the threePeaks condition is verified, we do a bitwise AND between condition and
   * another 32 bit integer whose first 2 bits are put to 1, as well as the bit corresponding to the
   * preceding amino acid in the protein sequence.
   */
  uint32_t condition;
  bool l_support;
};

class SpOMSSpectrum;

typedef std::shared_ptr<const SpOMSSpectrum> SpOMSSpectrumCsp;

class PMSPP_LIB_DECL SpOMSSpectrum : public std::vector<specglob::ExperimentalSpectrumDataPoint>
{
  public:
  /**
   * Default constructor
   * @param exp_spectrum ExperimentalSpectrum to preprocess
   * @param precision_ptr Precision to use for preprocessing
   */
  SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum,
                pappso::PrecisionPtr precision_ptr,
                const pappso::AaCode &aaCode);

  /**
   * Copy constructor
   * @param other SpOMSSpectrum to copy
   */
  SpOMSSpectrum(const SpOMSSpectrum &other);

  /**
   * Post-processing constructor
   * @brief Returns a copy of the provided spectrum accounting for the provided precursor mass error
   * @param other SpOMSSpectrum to copy
   * @param precursor_mass_error precursor mass error to account for
   */
  SpOMSSpectrum(const SpOMSSpectrum &other, double precursor_mass_error);

  /**
   * Destructor
   */
  virtual ~SpOMSSpectrum();

  /**
   * @brief Adds an amino acid position to the data structure
   * @param aa Amino acid to add to the data structure
   * @param r_peak index of the amino acid's right support peak
   * @param l_peak index of the amino acid's left support peak, if it is supported by an amino acid,
   * otherwise see next_l_peak
   * @param next_l_peak index of the first supported peak to the left of the amino acid's left
   * support peak
   * @param l_support indicates whether the amino acid's left support peak if supported by an amino
   * acid
   */
  void addAaPosition(uint8_t aa,
                     const std::size_t r_peak,
                     const std::size_t l_peak,
                     const std::size_t next_l_peak,
                     bool l_support);

  /**
   * @brief Returns the list of aa_positions for a given amino acid code
   * @param aa_code Amino acid code for which to retrieve positions
   */
  const std::vector<AaPosition> &getAaPositions(std::uint8_t aa_code) const;

  /**
   * @brief Returns the list of aa_positions for a given amino acid, except those relying on
   * provided peaks
   */
  std::vector<AaPosition> getAaPositions(std::uint8_t aa_code,
                                         std::vector<std::size_t> &peaks_to_remove) const;

  /**
   * @brief Returns the spectrum's list of masses
   */
  std::vector<double> getMassList() const;

  /**
   * @brief Returns the type of one of the spectrum's peaks
   * @param indice Peak index to be identified
   */
  specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const;

  /**
   * @brief Returns the spectrum's precursor's charge
   */
  uint getPrecursorCharge() const;

  double getPrecursorMass() const;

  /**
   * @brief Returns the missing mass between a peak and the precursor's mass (shift at the end).
   */
  double getMissingMass(std::size_t peak) const;

  /**
   * @brief Returns the mz difference between two peaks
   * @param l_peak left peak
   * @param r_peak right peak
   */
  double getMZShift(std::size_t l_peak, std::size_t r_peak) const;

  std::size_t getComplementaryPeak(std::size_t peak) const;

  private:
  pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum;
  std::vector<std::shared_ptr<std::vector<AaPosition>>> m_aapositions;
  pappso::PrecisionPtr m_precision_ptr;
  std::vector<std::shared_ptr<std::vector<uint8_t>>> m_supported_peaks;
  std::vector<int> m_reindexed_peaks; //  Index of supported peaks after removing unsupported peaks;
                                      //  -1 if unsupported
  const pappso::AaCode &m_aaCode;
  std::vector<std::size_t> m_complementary_peak_indexes;
  double m_precursor_mass_error;

  /**
   * @brief Preprocess the spectrum
   */
  void preprocessSpectrum(); // TODO : min number of found amino acids

  /**
   * @brief Removes the unsupported peaks (without an amino acid to the left) from the spectrum
   */
  void removeUnsupportedMasses();

  /**
   * @brief Computes the "condition" integer, used to apply the three peaks rule
   */
  uint32_t computeCondition(const std::size_t l_peak,
                            bool l_support) const; //  l_peak is original index

  /**
   * @brief Add a peak to the supported peaks list
   */
  void addSupportedPeak(std::size_t peak);

  /**
   * @brief Reindexes the peaks after removal of the unsupported peaks
   */
  void correctPeakIndexes();

  /**
   * @brief For each point of the spectrum, indicate the index of its complementary peak;
   */
  void fillComplementaryPeakIndexes();
};

} // namespace specpeptidoms
} // namespace pappso
