/* GNU polyxmass - the massist's program.
   -------------------------------------- 
   Copyright (C) 2000,2001,2002,2003,2004 Filippo Rusconi

   http://www.polyxmass.org

   This file is part of the "GNU polyxmass" project.
   
   The "GNU polyxmass" project is an official GNU project package (see
   www.gnu.org) released ---in its entirety--- under the GNU General
   Public License and was started at the Centre National de la
   Recherche Scientifique (FRANCE), that granted me the formal
   authorization to publish it under this Free Software License.

   This software is free software; you can redistribute it and/or
   modify it under the terms of the GNU  General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.
   
   You should have received a copy of the GNU  General Public
   License along with this software; if not, write to the
   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.
*/

#ifndef PXMCHEM_FRAGSPEC_H
#define PXMCHEM_FRAGSPEC_H

#include "libpolyxmass-globals.h"
#include "pxmchem-polchemdef.h"




#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */


/* typedef's
 */
typedef struct _PxmFragSpec PxmFragSpec;
typedef struct _PxmFragRule PxmFragRule;


/* The PxmFragSpec struct is the structure that holds enough
   information to make fragments out of a polymer sequence.
   Fragmentations are a process by which a polymer can be
   broken into parts, usually through the use of high energies.
   The fragmentation pattern is something that is dependent on 
   the nature of the polymer, and more specifically on the
   chemical nature of the monomers that compose the polymer.
   Usually the fragmentation occurs a very specific points in the
   polymer, especially at different points in the bond that
   links two consecutive monomers (this bond is usually weak with
   respect to the bonds that make the body of a monomer). If
   the energy applied to the polymer is high, the polymer might
   begin to break inside so particularly "weak/labile" monomers,
   as is the case for saccharides, for example.
  
   This structure (with the fragrule one) is an attempt to give to 
   the user the ability to model any kind of fragmentation by
   specifying a number of characteristics of the fragmentation process. 
 */
struct _PxmFragSpec
{
  gchar *name;

  /* end indicates if the fragment contains the right end or 
     the left end. Might have one of these values :

     PXM_FRAG_END_NONE | PXM_FRAG_END_LEFT | PXM_FRAG_END_RIGHT

     Attention, it is not possible that PXM_FRAG_END_LEFT and
     PXM_FRAG_END_RIGHT are ORed together (which is PXM_FRAG_END_BOTH
     is prohibited).

     When the fragmentation occurs, usually one of the fragments
     is not charged, thus not visible by the mass spec detector.
     This member indicates on which fragment sits the charge, thus
     indicates which fragment is seen by the mass spec. 

     PXM_FRAG_END_LEFT stands for left end, an so
     forth. PXM_FRAG_END_NONE is a special case where there is
     actually no fragment that encompasses one or the other cap (each
     fragment is set apart, isolated totally from the previous/next
     monomer in the polymer). See the case of immonium ions for the
     proteins. This member is necessary to know which capping
     chemistry should be applied upon computation of the fragment
     mass/formula. Indeed -for a protein-, as an example, if from is
     PXM_FRAG_END_LEFT, then the proton is the chemical cap. If it
     were PXM_FRAG_END_RIGHT, then we would have added the OH cap,
     since the proteins are hydroxyl-capped on their right end.
   */
  PxmFragEnd end;
  
  /* actform is an allocated string of unlimited size.  actform
     provides a way to describe what is to be performed -as a chemical
     computation- beyond simply adding the formulae/masses for the
     monomers according to the fromEnd indication. For example, for a
     protein, when calculating a a_10 (fragment a, monomer 10), we
     would compute the masses using the then first monomers, but then
     we will have to remove the mass/formula of COOH with respect to
     the mass computed for the equivalent oligomer. This is what
     actform is for. It can be NULL, however if no chemical reaction
     is to be performed. Note that in some cases this member is not
     needed to model a fragment. Thus, actform might be NULL.
   */
  gchar *actform;

  /* comment is an allocated string the user might want to enter to
     explain or remind him later of some special features in this
     fragspec. This string may be NULL if no comment is supplied by
     the user.
   */
  gchar *comment;

  /* fgrGPA is an array in which all the PxmFragRule 
     descriptions are stored. (see struct PxmFragRule.)
   */
  GPtrArray * fgrGPA;
};


/* The PxmFragRule struct is the structure that holds enough
   information to precisely generate fragments when the
   fragmentation pattern is dependent on the nature of 
   the monomer where fragmentation occurs, optionally allowing
   to define conditions on the fragmentation based on the
   nature of the monomers prior and/or after the one where fragmentation
   occurs.
 */
struct _PxmFragRule
{
  /* name is a string of unlimited size that holds the name of the
     fragrule. This name cannot be repeated in two fragrules belonging
     to the same fragspec, while two or more fragrules can have the
     same this member.
   */
  gchar *name;

  /* prev is a string of unlimited size that holds a monomer
     code. This string must be a valid code in the current polymer
     context. This code describes which monomer must be before the one
     currently fragmented for the actform to be applied. This can be
     NULL, in which case it is not used for testing the condition.
   */
  gchar *prev;

  /* this is a string of unlimited size that holds a monomer
     code. This string must be a valid code in the current polymer
     context. This code describes the monomer currently
     fragmented. This string may be NULL in which case the fragrule
     looses its power.
   */
  gchar *this;

  /* next is a string of unlimited size that holds a monomer
     code. This string must be a valid code in the current polymer
     context. This code describes which monomer must be after the one
     currently fragmented for the actform to be applied. This can be
     NULL, in which case it is not used for testing the condition.
   */
  gchar *next;

  /* actform string of unlimited size that holds an actform. This 
     actform is applied only if the conditions are
     valid:
    
     -current monomer fragmented has this code: this
    
     -previous monomer in the polymer sequence has this code:
     prev, if non-NULL.
    
     -next monomer in the polymer sequence has this code:
     next, if non-NULL.
     
     Note that the notion of prev/next depends on the direction
     of the fragmentation. For example, the following polymer sequence
     lc-ABCDEFGHIGKLM-rc (lc is left cap and rc right cap)
     can be fragmented from the left end to the right end, if the 
     end member of PxmFragSpec is LE:
     
     le-A
     le-AB
     le-ABC
     ...
     But also from the right end to the left end , if the 
     end member of PxmFragSpec is RE:
    
     M-re
     LM-re
     KLM-re
    
     Let us say that in the first LE case, we are fragmenting at
     le-B. In this case prev is A, and next is C.
     
     Let us say that in the second RE case, we are fragmenting at
     re-L. In this case prev is M, and next is K.

     actform may not be NULL, otherwise the whole fragrule would make
     no sense.
  */
  gchar *actform;

  /* comment is an allocated string the user might want to enter to
     explain or remind him later of some special features in 
     this fragrule. This string can be NULL.
   */
  gchar *comment;
};



/* NEW'ING FUNCTIONS, DUPLICATING FUNCTIONS ...
 */
PxmFragSpec *
pxmchem_fragspec_new (void);


PxmFragRule *
pxmchem_fragrule_new (void);


PxmFragSpec *
pxmchem_fragspec_new_by_name (gchar *name, GPtrArray *GPA);


PxmFragRule *
pxmchem_fragrule_new_by_name (gchar *name, GPtrArray *GPA);


PxmFragSpec *
pxmchem_fragspec_dup (const PxmFragSpec *fgs);


PxmFragRule *
pxmchem_fragrule_dup (const PxmFragRule *fgr);


gboolean
pxmchem_fragspec_set_name (PxmFragSpec *fgs, gchar *name);


gboolean
pxmchem_fragspec_set_end (PxmFragSpec *fgs, PxmFragEnd end);


gboolean
pxmchem_fragspec_set_actform (PxmFragSpec *fgs, gchar *actform);


gboolean
pxmchem_fragspec_set_comment (PxmFragSpec *fgs, gchar *comment);


gboolean
pxmchem_fragrule_set_name (PxmFragRule *fgr, gchar *name);


gboolean
pxmchem_fragrule_set_prev (PxmFragRule *fgr, gchar *prev);


gboolean
pxmchem_fragrule_set_this (PxmFragRule *fgr, gchar *this);


gboolean
pxmchem_fragrule_set_next (PxmFragRule *fgr, gchar *next);


gboolean
pxmchem_fragrule_set_actform (PxmFragRule *fgr, gchar *actform);


gboolean
pxmchem_fragrule_set_comment (PxmFragRule *fgr, gchar *comment);




/* INTEGRITY CHECKING FUNCTIONS
 */
gboolean
pxmchem_fragspec_validate (PxmFragSpec *fragspec, PxmPolchemdef *polchemdef,
			   gchar **valid);

gboolean
pxmchem_fragspec_unique_by_name (PxmFragSpec *fragspec, 
				   GPtrArray *GPA);

gboolean
pxmchem_fragrule_validate (PxmFragRule *fragrule, PxmPolchemdef *polchemdef,
			   gchar **valid);

gboolean
pxmchem_fragrule_unique_by_name (PxmFragRule *fragrule, 
				 GPtrArray *GPA);




/*  LOCATING FUNCTIONS
 */
gint
pxmchem_fragspec_get_index_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragspec_get_index_top_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragspec_get_index_bottom_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragrule_get_index_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragrule_get_index_top_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragrule_get_index_bottom_by_name (gchar *name, GPtrArray *GPA);


gint
pxmchem_fragspec_get_index_by_ptr (GPtrArray *GPA, 
				   PxmFragSpec *fgs);

gint
pxmchem_fragrule_get_index_by_ptr (GPtrArray *GPA, 
				   PxmFragRule *fgr);

PxmFragSpec *
pxmchem_fragspec_get_ptr_by_name (gchar *name, GPtrArray *GPA);


PxmFragRule *
pxmchem_fragrule_get_ptr_by_name (gchar *name, GPtrArray *GPA);



/* UTILITY FUNCTIONS
 */



/* XML-format TRANSACTIONS
 */
gchar *
pxmchem_fragspec_format_xml_string_fgs (PxmFragSpec *fgs, 
				    gchar *indent, gint offset);

  

PxmFragSpec *
pxmchem_fragspec_render_xml_node_fgs (xmlDocPtr xml_doc,
				    xmlNodePtr xml_node,
				    gpointer user_data);


gchar *
pxmchem_fragrule_format_xml_string_fgr (PxmFragRule *fgr, 
				    gchar *indent, gint offset);
  

PxmFragRule *
pxmchem_fragrule_render_xml_node_fgr (xmlDocPtr xml_doc,
				  xmlNodePtr xml_node,
				  gpointer user_data);



/* FREE'ING FUNCTIONS
 */
gboolean
pxmchem_fragspec_free (PxmFragSpec *fgs);


gboolean
pxmchem_fragrule_free (PxmFragRule *fgr);



/* GPtrArray-RELATED FUNCTIONS
 */
gint
pxmchem_fragspec_GPA_free (GPtrArray *GPA);


gint
pxmchem_fragrule_GPA_free (GPtrArray *GPA);




#ifdef __cplusplus
}
#endif /* __cplusplus */


#endif /* PXMCHEM_FRAGSPEC_H */
