/*
 * 	train_utils.h
 *
 *  Created on: 17 May 2011
 *      Author: torsten
 */

#ifndef TRAIN_UTILS_H_
#define TRAIN_UTILS_H_

#ifndef GRAPH_TYPES_H_
#include <graph_types.h>
#endif // GRAPH_TYPES_H_

#ifndef	_CTYPE_H
#include <ctype.h>
#endif // _CTYPE_H

#ifndef	_SYS_TYPES_H
#include <sys/types.h>
#endif // _SYS_TYPES_H

#ifndef TRAIN_TYPES_H_
#include "train_types.h"
#endif // TRAIN_TYPES_H_

#ifndef FEA_HISTOGRAM_H_
#include <fea/fea_histogram.h>
#endif // FEA_HISTOGRAM_H_

#ifndef _BACKWARD_HASH_MAP
#include <ext/hash_map>
#endif // _BACKWARD_HASH_MAP

#ifndef BOOST_FILESYSTEM_FILESYSTEM_HPP
#include <boost/filesystem.hpp>
#endif // BOOST_FILESYSTEM_FILESYSTEM_HPP

#ifndef _GLIBCXX_STRING
#include <string>
#endif // _GLIBCXX_STRING

#ifndef _GLIBCXX_VECTOR
#include <vector>
#endif // _GLIBCXX_VECTOR

#ifndef _STDIO_H
#include <stdio.h>
#endif // _STDIO_H

namespace std { using namespace __gnu_cxx; }

namespace	Training	{

	struct	PermuteLetters	{

		int32_t s, t, run;

				PermuteLetters	(u_int32_t i, u_int32_t r):s(i),t(48),run(123)
									{  }

		void	Shift			()	{
			do {
				t++;
			} while(!isalpha(t) || s == t);

		}

		bool	operator()		()	{
			return (t < run);
		}

		std::string
				get				()	const {
			std::string str;
			str = (char)s;
			str += (char)t;
			return str;
		}
	};

	typedef std::vector<boost::filesystem::path> vec;

	class	NodeFeatureLoader	{
	protected:
		const bool	m_forget_first_line;

		char		m_B [1024];

		const bool	ForgetFirstLine()
		{	return m_forget_first_line;	}

	public:
					NodeFeatureLoader
									(bool=false);

		virtual		~NodeFeatureLoader
									();

		virtual
		bool		Read_Features	(const char* log, const char* filename);

		virtual
		int			Read_Value		(FILE*)
		{  return 0;						}

		virtual
		void		Assign_Feature	(GraphGen::Node<Types::Node>&)
		{  }

		static
		bool		readline		(FILE* f);

		static
		void		LogFn			(const char* log, const char* fn);

	};

	class	WeightsLoader : public NodeFeatureLoader	{
	public:
					WeightsLoader	():NodeFeatureLoader(true) { }

					~WeightsLoader	();

         bool		ReadValues		(const char*, const char*, std::vector<double>&, std::vector<double>&);

	private:
         bool		ReadPortion		(const char*, FILE* f, std::vector<double>& w, const size_t no_w);
	};

	class LabellingLoader : public NodeFeatureLoader	{

		enum {
			LabellingLoader_Accept_Object	=	8
		};

		int			m_Label, m_TP_ref;

		public:
					LabellingLoader	(const bool forget_first_line):NodeFeatureLoader(forget_first_line), m_Label(0), m_TP_ref(0)
					{  }

			int		Read_Value		(FILE* f);

			void	Assign_Feature	(GraphGen::Node<Types::Node>&);

			bool	Accept_Object	()	const;
	};

	class ConserveLoader : public NodeFeatureLoader	{
		float			m_ConsVal;

		public:
					ConserveLoader	():NodeFeatureLoader(), m_ConsVal(0.)
					{  }

			int		Read_Value		(FILE*);

			void	Assign_Feature	(GraphGen::Node<Types::Node>&);
	};

	class EprosLoader 	: public NodeFeatureLoader	{
		float			m_EprosVal;

		public:
					EprosLoader	():NodeFeatureLoader(), m_EprosVal(0.)
					{  }

			int		Read_Value		(FILE*);

			void	Assign_Feature	(GraphGen::Node<Types::Node>&);
	};

	class PssmLoader : public NodeFeatureLoader	{
		float			m_Pssm_If, m_Pssm_NoIf;

		public:
					PssmLoader	():NodeFeatureLoader(), m_Pssm_If(0.), m_Pssm_NoIf(0.)
					{  }

			int		Read_Value	(FILE*);

			void	Assign_Feature
								(GraphGen::Node<Types::Node>&);
	};

	class RgLoader : public NodeFeatureLoader       {
		float                   m_RgVal;

	public:
					RgLoader        ():NodeFeatureLoader(), m_RgVal(0.)
					{  }

			int             Read_Value              (FILE*);

			void    Assign_Feature  (GraphGen::Node<Types::Node>&);
	};

	class FeLoader : public NodeFeatureLoader	{
		float			m_FeVal;

		public:
					FeLoader		():NodeFeatureLoader(), m_FeVal(0.)
					{  }

			int		Read_Value	(FILE*);

			void	Assign_Feature
								(GraphGen::Node<Types::Node>&);
	};

	class PssmLoaderV2 : public PssmLoader	{
		GraphGen::Node<Types::Node>
					m_Node;
		int64_t		m_Slot, m_Curr_Res;
		FILE*		m_Fh;

		public:
					PssmLoaderV2():PssmLoader(),m_Slot(0), m_Curr_Res(0), m_Fh(NULL)
					{  }

			int		Read_Value	(FILE*);

			void	Assign_Feature
								(GraphGen::Node<Types::Node>&);
		private:
			bool	Read_PssmBlockAnd_Assign
								();
	};

	class	Utils	{

		friend 	class	NodeFeatureLoader;
		friend	class	PssmLoaderV2;

		enum ReadStateStructure		{
			Readstate_idle	=	0,
			Readstate_nodes	=	1,
			Readstate_edges	=	2
		};

		enum	LoadObjectState	{
			LoadObjectState_Idle				=	0x00,
			LoadObjectState_StructureAndRASA	=	0x01,
			LoadObjectState_Labels				=	0x02,
			LoadObjectState_Consrv				=	0x04,
			LoadObjectState_Pssm				=	0x08,
			LoadObjectState_WeightVectors		=	0x10,
			LoadObjectState_Dists				=	0x20,
			LoadObjectState_Epros				=	0x40,
			LoadObjectState_Rg					=	0x80,
			LoadObjectState_Fe					=	0x100
		};

	private:
		static
		bool			m_Verbose, m_MoreVerbose, m_Weights_Loaded;

		static
		ReadStateStructure
						m_ReadStateStruct;

		static
		int32_t
						m_LoadObject_State;

		static
		std::vector<GraphGen::Node<Types::Node> >
						m_Nodes;

		static
		std::vector<GraphGen::Node<Types::Node>*>
						m_Nodes_p;

		static
		std::vector<Types::Edge>
						m_Edges;

		/* Configuration dynamic edge inclusion
		 * 									*/
		static
		double			m_Opt_DynmEdge_Threshold;

		/* Configuration C \alpha backbone	*/
		static
		std::vector<size_t>
						m_Backbone;

		/* Configuration Graph and Labels	*/
		static
		u_int64_t 		m_No_Nds,
						m_No_Edges,
						m_No_Labels,
						m_Min_NodeId,
						m_Max_NodeId;

		/*	Configuration Fs				*/
		static
		std::string		m_Dir, m_PDB_Object, m_Chain, m_Output_Dir, m_Prot_InFile, m_RgData_Dir, m_FeData_Dir, m_KyLabels_Dir;

		static
		Types::Cmd		m_Curr_Cmd;

	public:
		static
		void			Init			();

		static
		void			CleanUp			();

		static
		void			Set_FeaturesDIR	(const char*);

		static
		void			Set_ValidationDIR
										(const char*);

		static
		void			Set_PredictionDIR
										(const char*);

		static
		void			Set_RgData_Dir	(const char*);

		static
		void			Set_FeData_Dir	(const char*);

		static
		void			Set_KyLabels_Dir(const char*);

		static
		void			Set_DataOutputDIR
										(const char*);

		static
		void			SetInputFile_Proteins
										(const char* d);

		static
		void			Set_OutputPerObject
										();

		static
		void			SetIntermediates(size_t);

		static
		void			SetMaxLearnStepsPerSample
										(size_t);

		static
		void			SetChunkSize	(size_t n);

		static
		void			SetLearnRounds_PerInput
										(size_t n);

		static
		void			SetDynmEdge_Distance
										(double dist);

		static
		void			Run				(Types::Cmd&);

		static
		void			Load_Reference_Data
										(std::string&);

		/* LoadObjects_Sequentially	-	loads PDB objects steps by step in
		 * 								and fetches the associated reference data
		 * 								implements recursive depth-search for entries
		 * */
		static
		void			LoadObjects_Sequentially
										(std::string dir);

		static
		void			SetEdgeModel	(double alpha  = Training::Types::Edge::EdgeModel__gii, double beta = Training::Types::Edge::EdgeModel__gij);

		static
		void			SetOpt_FP_Scale_inOLM
										(double scale_fp);

		static
		void			SetOpt_FN_Scale_inOLM
										(double scale_fn);

		static
		bool			Verbose			()			{
			return m_Verbose;
		}

		static
		bool			MoreVerbose		()			{
			return m_MoreVerbose;
		}

		static
		void			SetVerbose		(bool v);

		static
		void			SetMoreVerbose	(bool v);

		static
		void			SetOpt_LearnFunctional_ROCcurve
										();
		static
		void			SetOpt_LearnFunctional_ROCdiff
										();
		static
		void			SetOpt_LearnFunctional_ROCset_1
										(double, double);
		static
		void			SetOpt_LearnFunctional_ROCset_2
										(double, double);
		static
		void			SetNoLearnEdgeModel
										(bool w);

		static
		void			SetRun_InLogDomain
										(bool v);

		static
		void			SetOpt_ConservExt(bool opt);

		static
		void			SetOpt_PssmExt	(bool opt);

		static
		void			SetOpt_BinFeatures
										(bool opt);

		static
		void			SetOpt_RASAext	(bool opt);

		static
		void			SetOpt_Epros	(bool opt);

		static
		void			SetOpt_Rg		(bool opt);

		static
		void			SetOpt_Fe		(bool opt);

		static
		void			SetOpt_Rg_CfgData(bool cfg);

		static
		void			SetOpt_Using_Exp_EnergyTerm
										(bool opt);

		static
		void			SetOpt_InferWithGibbs
										(bool o);

		static
		void			SetOpt_SetWeightZero_iff_Negative
										(bool o);

	private:
		static
		bool			Read_ObjectAnd_rASA
										(const std::string& rasa_fn);

		static
		void			Load_Object		(const std::string& dir, const std::string& o, const std::string& prim_c, const std::string& sec_ch);

		static
		void			LoadObjects_FromFile
										();

		static
		int64_t			CheckSyntax_AndAssign_NodeId
										(const char* end);

		static
		bool			Reading_DynamicEdge
										(const char* b, int64_t& id1, int64_t& id2);

		static
		bool			Loading_DynamicEdges
										(const std::string& fn);

		static
		void			Append_C_alpha_BackBone
										();

		static
		void			DoInputSource	();

	};

}	// Training

#endif // TRAIN_UTILS_H_
