/*
 * 	train_utils.cpp
 *
 *  Created on: 17 May 2011
 *      Author: torsten
 */

#include "train_utils.h"
#include "train_types.h"
#include "train_labelling.h"
#include <graph_types.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#include <math.h>
#include <ctype.h>
#include <string>
#include <ext/hash_map>
#include <vector>
#include <fstream>
#include <iterator>
#include <algorithm>
#include <limits.h>
#include <boost/filesystem.hpp>
#include <boost/regex.hpp>

using namespace boost::filesystem;

#if defined(DEBUG_TRAIN_UTILS) || defined(DEBUG_TRAIN_UTILS_INPUT)
extern
void	train_utils_log	(char*);
static
char	log_txt	[16 * 1024];
#define	LOG_TRAIN_UTILS(s) sprintf(log_txt,s); \
							if(Utils::Verbose()) train_utils_log(log_txt);
#define	LOG_TRAIN_UTILS_2(s,t) sprintf(log_txt,s,t); \
							if(Utils::Verbose()) train_utils_log(log_txt);
#define	LOG_TRAIN_UTILS_3(s,t,u) sprintf(log_txt,s,t,u); \
							if(Utils::MoreVerbose()) train_utils_log(log_txt);
#define	LOG_TRAIN_UTILS_4(s,t,u,v) sprintf(log_txt,s,t,u,v); \
							if(Utils::MoreVerbose()) train_utils_log(log_txt);
#define	LOG_TRAIN_UTILS_5(s,t,u,v,w) sprintf(log_txt,s,t,u,v,w); \
							if(Utils::MoreVerbose()) train_utils_log(log_txt);
#define	LOG_TRAIN_UTILS_6(s,t,u,v,w,x) sprintf(log_txt,s,t,u,v,w,x); \
							if(Utils::MoreVerbose()) train_utils_log(log_txt);
#else
#define	LOG_TRAIN_UTILS(s)
#define	LOG_TRAIN_UTILS_2(s,t)
#define	LOG_TRAIN_UTILS_3(s,t,u)
#define	LOG_TRAIN_UTILS_4(s,t,u,v)
#define	LOG_TRAIN_UTILS_5(s,t,u,v,w)
#define	LOG_TRAIN_UTILS_6(s,t,u,v,w,x)
#endif // DEBUG_TRAIN_UTILS || DEBUG_TRAIN_UTILS_INPUT

namespace Training	{

Types::Cmd
Utils::m_Curr_Cmd 			= Types::Cmd_Idle;

u_int64_t
Utils::m_No_Nds				=	0,
Utils::m_No_Edges			=	0,
Utils::m_No_Labels			=	2,
Utils::m_Min_NodeId			=	ULLONG_MAX,
Utils::m_Max_NodeId			=	0;

std::string
Utils::m_Dir, Utils::m_PDB_Object, Utils::m_Chain, Utils::m_RgData_Dir, Utils::m_FeData_Dir, Utils::m_KyLabels_Dir,
Utils::m_Output_Dir, Utils::m_Prot_InFile;

Utils::ReadStateStructure
Utils::m_ReadStateStruct	=	Utils::Readstate_idle;

int32_t
Utils::m_LoadObject_State	=	0;

std::vector<GraphGen::Node<Types::Node> >
Utils::m_Nodes;

std::vector<GraphGen::Node<Types::Node>*>
Utils::m_Nodes_p;

std::vector<size_t>
Utils::m_Backbone;

std::vector<Types::Edge>
Utils::m_Edges;

double
Utils::m_Opt_DynmEdge_Threshold
							=	7.0;

bool
Utils::m_Verbose			=	false,
Utils::m_MoreVerbose		=	false,
Utils::m_Weights_Loaded		=	false;

void
Utils::SetVerbose(bool v) {
	m_Verbose = v;
	Classifier::SetVerbose(v);
}

void
Utils::SetMoreVerbose(bool v) {
	m_MoreVerbose = v;
	Classifier::SetMoreVerbose(v);
}

void
Utils::SetDynmEdge_Distance(double dist) {
	m_Opt_DynmEdge_Threshold = dist;
}

void
Utils::SetIntermediates(size_t no) {
	Classifier::SetIntermediates(no);
}

void
Utils::SetMaxLearnStepsPerSample(size_t no) {
	Classifier::SetMaxLearnStepsPerSample(no);
}

void
Utils::SetLearnRounds_PerInput(size_t n) {
	Classifier::SetLearnRounds_PerInput(n);
}

void
Utils::SetChunkSize(size_t n) {
	Classifier::SetChunkSize(n);
}

void
Utils::SetOpt_ConservExt(bool opt) {
	Classifier::SetOpt_ConservExt(opt);
}

void
Utils::SetOpt_PssmExt(bool opt) {
	Classifier::SetOpt_PssmExt(opt);
}

void
Utils::SetOpt_RASAext(bool opt) {
	Classifier::SetOpt_RASAext(opt);
}

void
Utils::SetOpt_BinFeatures(bool opt) {
	Classifier::SetOpt_BinFeatures(opt);
}

void
Utils::SetOpt_Epros(bool opt) {
	Classifier::SetOpt_Epros(opt);
}

void
Utils::SetOpt_Rg(bool opt) {
	Classifier::SetOpt_Rg(opt);
}

void
Utils::SetOpt_Fe(bool opt) {
	Classifier::SetOpt_Fe(opt);
}

void
Utils::SetOpt_Rg_CfgData(bool cfg) {
	Classifier::SetOpt_Rg_CfgData(cfg);
}

void
Utils::SetOpt_InferWithGibbs(bool o) {
	Classifier::SetOpt_InferWithGibbs(o);
}

void
Utils::SetOpt_SetWeightZero_iff_Negative(bool o) {
	Classifier::SetOpt_SetWeightZero_iff_Negative(o);
}

void
Utils::SetOpt_Using_Exp_EnergyTerm(bool cfg) {
	Classifier::SetOpt_Using_Exp_EnergyTerm(cfg);
}

void
Utils::SetOpt_LearnFunctional_ROCcurve() {
	Classifier::SetOpt_LearnFunctional_ROCcurve(true);
}

void
Utils::SetOpt_LearnFunctional_ROCdiff() {
	Classifier::SetOpt_LearnFunctional_ROCdiff(true);
}

void
Utils::SetOpt_LearnFunctional_ROCset_1(double fpr, double tpr) {
	Classifier::SetOpt_LearnFunctional_ROCset_1(fpr,tpr);
}

void
Utils::SetOpt_LearnFunctional_ROCset_2(double fpr, double tpr) {
	Classifier::SetOpt_LearnFunctional_ROCset_2(fpr,tpr);
}

void
Utils::SetOpt_FP_Scale_inOLM(double scale_fp) {
	Classifier::SetOpt_FP_Scale_inOLM(scale_fp);
}

void
Utils::SetOpt_FN_Scale_inOLM(double scale_fn) {
	Classifier::SetOpt_FN_Scale_inOLM(scale_fn);
}

void
Utils::SetEdgeModel(double alpha, double beta) {
	Classifier::SetEdgeModel(alpha,beta);
}

void
Utils::SetRun_InLogDomain(bool v) {
	Classifier::SetRun_InLogDomain(v);
}

void
Utils::Init() {
	std::ofstream o;

	if(m_Output_Dir.empty())
		m_Output_Dir = m_Dir;
	std::string s1(m_Output_Dir + "/"), s2;
	s2 = s1;
	if(m_Curr_Cmd == Types::Cmd_Learn) {
		s1 += "proteins.learned";
		s2 += "proteins.not-learned";
	} else if (m_Curr_Cmd == Types::Cmd_Validate) {
		s1 += "proteins.validated";
		s2 += "proteins.not-validated";
	} else if (m_Curr_Cmd == Types::Cmd_Parse) {
		s1 += "proteins.parsed";
		s2 += "proteins.not-parsed";
	 } else if (m_Curr_Cmd == Types::Cmd_ModelGraphs) {
		 s1 += "proteins.mg-generated";
		 s2 += "proteins.mg-not-generated";
	 }
	o.open(s1.c_str());
	o.close();
	o.open(s2.c_str());
	o.close();

    srand(time(NULL));
	m_Nodes.clear();
	m_Edges.clear();
	m_No_Nds = m_No_Edges = 0;
	m_Min_NodeId = ULLONG_MAX;
	m_Max_NodeId = 0;
}

void
Utils::Set_OutputPerObject() {
	Training::Classifier::Set_OutputPerObject();
}

void
Utils::SetNoLearnEdgeModel(bool w) {
	Training::Classifier::SetNoLearnEdgeModel(w);
}

bool
Utils::Read_ObjectAnd_rASA(const std::string& rasa_fn) {
	u_int32_t no_read(0);
	size_t dummy(0);
	FILE* f(NULL);
	struct stat buf;
	int ret(0);
	float rasa(0.);
	char b[64];
	int64_t id(0);
	bool rt(true);
	char *p(NULL);

	memset((void*)b,0,sizeof(b));
	m_No_Nds = m_No_Edges = 0;
	m_Min_NodeId = ULLONG_MAX;
	m_Max_NodeId = 0;
	if(stat(rasa_fn.c_str(),&buf) == 0) {
		f = fopen(rasa_fn.c_str(),"r");
		if(f != NULL) {
			char c(0);
			while(rt && !feof(f) && (ret != EOF || c != EOF)) {
				c = fgetc(f);
				if(c == '\n' || c == EOF) {
					continue;
				} else if(c != '#') {
					fseek (f,-1,SEEK_CUR);
					switch(m_ReadStateStruct) {

						case Readstate_idle:
							ret = fscanf(f,"%lu %lu",&m_No_Nds,&dummy);
#ifdef DEBUG_TRAIN_UTILS_INPUT
							LOG_TRAIN_UTILS_3("%lu %lu",m_No_Nds,dummy)
#endif // DEBUG_TRAIN_UTILS_INPUT
							if(ret != EOF) {
								m_ReadStateStruct = Readstate_nodes;
								m_Nodes.resize(m_No_Nds*2);
								no_read = 0;
							}
							break;

						case Readstate_nodes:
							ret = fscanf(f,"%s %f",b,&rasa);
#ifdef DEBUG_TRAIN_UTILS_INPUT
							LOG_TRAIN_UTILS_3("%s %f",b,rasa)
#endif // DEBUG_TRAIN_UTILS_INPUT

							/* additional syntax check confirm file e.g. pdb/a1/pdb3a17.ent-B.asa-graph
							 */
							id = CheckSyntax_AndAssign_NodeId(b);
							if(id > -1) {
								m_Min_NodeId = std::min(m_Min_NodeId,(u_int64_t)id);
								m_Max_NodeId = std::max(m_Max_NodeId,(u_int64_t)id);
								std::vector<GraphGen::Node<Types::Node> >::iterator n_id = find_if(m_Nodes.begin(),m_Nodes.end(),Types::FindNodeId(id));
								if(n_id == m_Nodes.end()) {
									m_Nodes[no_read].m_Id = id;
									rasa *= (float)Features::RASA_CONSERV_Val_COMMON_Scale;
									if(rasa > (float)Features::RASA_Threshold_ForBeing_labelled_Gt)
										m_Nodes[no_read].m_Nd.m_Score_rASA = rasa;
									else
										m_Nodes[no_read].m_Nd.m_Score_rASA = 0.;
									p = strpbrk((char*)b,"0123456789");
									if(p != NULL)
										*p = '\0';
									m_Nodes[no_read].m_Nd.m_Aa_Name = std::string(b);
								} else {
									LOG_TRAIN_UTILS_3("ERROR: node already seen %s>%s",rasa_fn.c_str(),b)
								}
							} else {
								LOG_TRAIN_UTILS_3("ERROR: amino acid syntax %s> %s",rasa_fn.c_str(),b)
							}

							if(++no_read == m_No_Nds) {
								m_ReadStateStruct = Readstate_edges;
								no_read = 0;
							}
							break;

						case Readstate_edges:
							ret = !fseek(f,0,SEEK_END);
							break;
					}
				} else
					NodeFeatureLoader::readline(f);
			}
			fclose(f);
		} else
			rt = false;

	} else
		rt = false;

	return rt;
}

void
Utils::CleanUp () {
	m_Nodes.clear();
	m_Edges.clear();
	m_Chain.clear();
	m_No_Nds = m_No_Edges = 0;
	m_LoadObject_State = LoadObjectState_Idle;
	m_ReadStateStruct = Readstate_idle;
}

void
Utils::Set_FeaturesDIR(const char* d) {
	m_Dir.assign(d,d+strlen(d));
}

void
Utils::Set_ValidationDIR(const char* d) {
	m_Dir.assign(d,d+strlen(d));
}

void
Utils::Set_PredictionDIR(const char* d) {
	m_Dir.assign(d,d+strlen(d));
}

void
Utils::SetInputFile_Proteins(const char* d) {
	m_Prot_InFile.assign(d,d+strlen(d));
}

void
Utils::Set_RgData_Dir(const char* d) {
	m_RgData_Dir.assign(d,d+strlen(d));
}

void
Utils::Set_FeData_Dir(const char* d) {
	m_FeData_Dir.assign(d,d+strlen(d));
}

void
Utils::Set_KyLabels_Dir(const char* d) {
	m_KyLabels_Dir.assign(d,d+strlen(d));
}

void
Utils::Set_DataOutputDIR(const char* d) {
	m_Output_Dir.assign(d,d+strlen(d));
	Classifier::Set_Output_Dir(m_Output_Dir);
}

void
Utils::LoadObjects_FromFile() {
	char dir[1024], obj[1024], prim_chain[1024], ch2[1024];
	struct stat buf;
	FILE* f(NULL);
	int ret(0);
	if(stat(m_Prot_InFile.c_str(),&buf) == 0) {
		f = fopen(m_Prot_InFile.c_str(),"r");
		do {
			ret = fscanf(f,"%s %s %s %s",dir,obj,prim_chain,ch2);
			LOG_TRAIN_UTILS_5("in>%s %s %s %s",dir,obj,prim_chain,ch2);
			if(ret == 4) {
				std::string sdir = m_Dir + "/" + dir;
				std::string o = obj;
				std::string prim_c = prim_chain;
				std::string sec_ch = ch2;
				Load_Object(sdir,o,prim_c,sec_ch);
			}
		} while(!feof(f) && ret != EOF);
		fclose(f);
	} else {
		LOG_TRAIN_UTILS_2("%s: File not found.",m_Prot_InFile.c_str())
	}
}

void
Utils::Load_Object(const std::string& dir, const std::string& obj, const std::string& prim_c, const std::string& sec_ch) {

	int32_t object_loaded;
	WeightsLoader weights;
	std::vector<double> w_g, w_q;
	std::ofstream o;
	std::string s;
	std::string fn(obj), fn_asa(".asa-graph"), fn_kylabels(".kylabels"), fn_labels(".labels"), fn_consrv(".consrv"), fn_pssm(".pssm"), fn_dists(".dists"), fn_epros(".epros"), fn_rg(".rg"), fn_fe(".fe");

	fn += "-";
	m_PDB_Object = obj;
	m_Chain = prim_c;
	m_LoadObject_State = LoadObjectState_Idle;

	s = dir + "/" + fn + m_Chain + fn_asa;
	NodeFeatureLoader asa;
	asa.LogFn("asa>",s.c_str());
	if((Read_ObjectAnd_rASA(s)))
		m_LoadObject_State |= LoadObjectState_StructureAndRASA;
#ifdef DEBUG_TRAIN_UTILS
	else {
		LOG_TRAIN_UTILS("no-asa>")
	}
#endif // DEBUG_TRAIN_UTILS

	/* Load in Conserv, EPROS, PSSM, Labels, Dists Files
	 * 									*/
	s = dir + "/" + fn + m_Chain + fn_consrv;
	ConserveLoader conservation;
	if(conservation.Read_Features("consrv>",s.c_str()))
		m_LoadObject_State |= LoadObjectState_Consrv;
#ifdef DEBUG_TRAIN_UTILS
	else {
		LOG_TRAIN_UTILS("no-consrv>")
	}
#endif // DEBUG_TRAIN_UTILS

	s = m_RgData_Dir + "/" + fn + m_Chain + fn_rg;
	RgLoader rg;
	if(rg.Read_Features("rg>",s.c_str()))
		m_LoadObject_State |= LoadObjectState_Rg;
#ifdef DEBUG_TRAIN_UTILS
	else {
		LOG_TRAIN_UTILS("no-rg>")
	}
#endif // DEBUG_TRAIN_UTILS

	if(m_FeData_Dir.size() != 0) {
		s = m_FeData_Dir + "/" + fn + m_Chain + fn_fe;
		FeLoader fe;
		if(fe.Read_Features("fe>",s.c_str()))
			m_LoadObject_State |= LoadObjectState_Fe;
#ifdef DEBUG_TRAIN_UTILS
		else {
			LOG_TRAIN_UTILS("no-fe>")
		}
#endif // DEBUG_TRAIN_UTILS
	}

	s = dir + "/" + fn + m_Chain + fn_epros;
	EprosLoader epros;
	if(epros.Read_Features("epros>",s.c_str()))
		m_LoadObject_State |= LoadObjectState_Epros;
#ifdef DEBUG_TRAIN_UTILS
	else {
		LOG_TRAIN_UTILS("no-epros>")
	}
#endif // DEBUG_TRAIN_UTILS

	s = dir + "/" + fn + m_Chain + fn_pssm;
	PssmLoader pssm;
	if(pssm.Read_Features("pssm>",s.c_str()))
		m_LoadObject_State |= LoadObjectState_Pssm;
#ifdef DEBUG_TRAIN_UTILS
	else {
		LOG_TRAIN_UTILS("no-pssm>")
	}
#endif // DEBUG_TRAIN_UTILS

	if(m_KyLabels_Dir.size() != 0) {
		s = m_KyLabels_Dir + "/" + fn + prim_c + sec_ch + fn_kylabels;
		LabellingLoader labels(true);
		if(labels.Read_Features("kylabels>",s.c_str()) && labels.Accept_Object())
			m_LoadObject_State |= LoadObjectState_Labels;

#ifdef DEBUG_TRAIN_UTILS
		else {
			LOG_TRAIN_UTILS("no-kylabels>")
		}
#endif // DEBUG_TRAIN_UTILS
	} else {
		s = dir + "/" + fn + prim_c + sec_ch + fn_labels;
		LabellingLoader labels(true);
		if(labels.Read_Features("labels>",s.c_str()) && labels.Accept_Object())
			m_LoadObject_State |= LoadObjectState_Labels;

#ifdef DEBUG_TRAIN_UTILS
		else {
			LOG_TRAIN_UTILS("no-labels>")
		}
#endif // DEBUG_TRAIN_UTILS
	}

	/* loading edges depending on requested atomic distance
	 	 	 	 	 	 	 	 	 	 	 	 */
	object_loaded = LoadObjectState(LoadObjectState_StructureAndRASA + LoadObjectState_Labels);
	if((m_LoadObject_State & object_loaded) == object_loaded) {
#ifndef DEBUG_ONLY_PSSM_NO_EDGES
		s = dir + "/" + fn + m_Chain + fn_dists;
		if(Loading_DynamicEdges(s))
			m_LoadObject_State |= LoadObjectState_Dists;
#ifdef DEBUG_TRAIN_UTILS
		else {
			LOG_TRAIN_UTILS("no-edges>")
		}
#endif // DEBUG_TRAIN_UTILS
#else
		m_LoadObject_State |= LoadObjectState_Dists;
#endif // DEBUG_ONLY_PSSM_NO_EDGES
	}
	object_loaded += LoadObjectState_Dists;
	switch(m_Curr_Cmd) {
		case Types::Cmd_Learn:
			if((m_LoadObject_State & object_loaded) == object_loaded) {
			/* structure has been built up, call classifier or whether from here
			 * 									*/
				s = m_Output_Dir + "/" + fn + m_Chain + sec_ch;
				Training::Classifier::Set_Output_Object(s.c_str());
				if(Training::Classifier::ExecLearning(m_Dir,m_Nodes,m_Edges,m_Nodes_p,m_Min_NodeId,m_Max_NodeId))
					s = m_Output_Dir + "/" + "proteins.learned";
				else
					s = m_Output_Dir + "/" + "proteins.not-learned";
			} else
				s = m_Output_Dir + "/" + "proteins.not-learned";
			o.open(s.c_str(),std::ios_base::out|std::ios_base::app);
			o << dir << "/" << fn << m_Chain << sec_ch << std::endl;
			o.close();
			break;

		case Types::Cmd_Validate:
		case Types::Cmd_Classify:
			/* Reading in weight vectors		*/
#ifndef DEBUG_ONLY_PSSM_NO_EDGES
			s = m_Dir + "/learn.vecs";
			if(!m_Weights_Loaded && weights.ReadValues("weights>",s.c_str(),w_q,w_g)) {
				Training::Classifier::SetTuned_Weights(w_g,w_q);
				m_Weights_Loaded = true;
			}
#else
			m_Weights_Loaded = true;
#endif // DEBUG_ONLY_PSSM_NO_EDGES
			if(m_Weights_Loaded)
				m_LoadObject_State |= LoadObjectState_WeightVectors;
			object_loaded |= LoadObjectState_WeightVectors;
			if((m_LoadObject_State & object_loaded) == object_loaded) {
				s = m_Output_Dir + "/" + fn + m_Chain + sec_ch;
				Training::Classifier::Set_Output_Object(s.c_str());
				if(Training::Classifier::ExecValidation(m_Dir,m_Nodes,m_Edges,m_Nodes_p,m_Min_NodeId,m_Max_NodeId))
					s = m_Output_Dir + "/" + "proteins.validated";
				else
					s = m_Output_Dir + "/" + "proteins.not-validated";
			} else
				s = m_Output_Dir + "/" + "proteins.not-validated";
			o.open(s.c_str(),std::ios_base::out|std::ios_base::app);
			o << dir << "/" << fn << m_Chain << sec_ch << std::endl;
			o.close();
			break;

		case Types::Cmd_Parse:
			if((m_LoadObject_State & object_loaded) == object_loaded)
			/* structure has been built up correctly
			 * 							*/
				s = m_Output_Dir + "/" + "proteins.parsed";
			else
				s = m_Output_Dir + "/" + "proteins.not-parsed";
			o.open(s.c_str(),std::ios_base::out|std::ios_base::app);
			o << dir << "/" << fn << m_Chain << sec_ch << std::endl;
			o.close();
			break;

		case Types::Cmd_Idle:
		default:
			break;

	}
	/* Clean up currently loaded object */
	CleanUp();
}

void
Utils::Load_Reference_Data(std::string& dir) {

	bool got_object(false), stop_search(false), ver(Verbose());
	char b[1024], b1[1024];
	vec v, v1;
	path path(dir), path1(dir);
	std::string fn(m_PDB_Object), fn_asa(".asa-graph");
	fn += "-";
	m_Chain.clear();

	if (exists(path) && is_directory(path)) {
		copy(directory_iterator(path), directory_iterator(), back_inserter(v));
		std::sort(v.begin(), v.end());
		for (vec::const_iterator it(v.begin()); (!got_object || !stop_search) && it != v.end(); ++it) {

			if (is_regular_file(*it) && !(*it).filename().empty()) {

				strcpy(b1,(*it).filename().c_str());

				SetVerbose(true);
				LOG_TRAIN_UTILS_3("  f dir(%s) %s",dir.c_str(),b1)
				SetVerbose(ver);

				const char* p = strstr(b1,fn.c_str());
				if(p != NULL) {

					got_object = true;
					stop_search = false;

				/* Localises the associated reference data	*/
					const char* q = strstr(b1,fn_asa.c_str());
					b[1] = '\0';
					if(q != NULL) {
						strncpy(b,--q,1);
						m_Chain = b;
						LOG_TRAIN_UTILS_2("  c %s",m_Chain.c_str())
#if 0
						Load_Object(dir,m_PDB_Object,m_Chain);
#endif //0
					}
				} else
					stop_search = true;
			}
		}
	}
}

/* LoadObjects_Sequentially	-	loads PDB objects steps by step in
 * 								and fetches the associated reference data
 * 															*/
void
Utils::LoadObjects_Sequentially(std::string dir) {

	char b[1024], b1[1024];
	vec v;
	path p(dir);
	std::string tmp_dir = dir, s;
	try {
#ifdef DEBUG_TRAIN_UTILS
		LOG_TRAIN_UTILS_2("  D %s",dir.c_str())
#endif // DEBUG_TRAIN_UTILS
		if (exists(p) && is_directory(p)) {
			copy(directory_iterator(p), directory_iterator(), back_inserter(v));
			std::sort(v.begin(), v.end());
			for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
				strcpy(b1,(*it).filename().c_str());
				if (is_regular_file(*it) && !(*it).filename().empty()) {
#ifdef DEBUG_TRAIN_UTILS
					LOG_TRAIN_UTILS_3("  F %s/%s",dir.c_str(),b1)
#endif // DEBUG_TRAIN_UTILS
					const char* p = strstr(b1,".ent");
					if(p != NULL) {
						memset((void*)b,0,p-b1+5);
						strncpy(b,b1,p-b1+4);
						if(m_PDB_Object.empty() || m_PDB_Object != std::string(b)) {
							m_PDB_Object = b;
#ifdef DEBUG_TRAIN_UTILS
							LOG_TRAIN_UTILS_3("\t O dir(%s) %s",dir.c_str(),m_PDB_Object.c_str())
#endif // DEBUG_TRAIN_UTILS
							Load_Reference_Data(dir);
						}
					}
				} else {
					if(is_directory(*it) && !(*it).filename().empty()) {
						tmp_dir = dir + "/" + b1;
						LoadObjects_Sequentially(tmp_dir);
					}
				}
			}
		}
	} catch (const filesystem_error& ex) {
#ifdef DEBUG_TRAIN_UTILS
		LOG_TRAIN_UTILS_2("%s",ex.what());
#endif // DEBUG_TRAIN_UTILS
	}
}

void
Utils::DoInputSource() {
	if(m_Prot_InFile.empty())
		/* parses the PDB Root DIR	*/
		LoadObjects_Sequentially(m_Dir);
	else
		/* takes proteins from file */
		LoadObjects_FromFile();
}

void
Utils::Run(Types::Cmd& cmd) {
	int rounds(0);
	m_Curr_Cmd = cmd;
	Init();
	switch(m_Curr_Cmd) {
		case Types::Cmd_Learn:
			rounds = Classifier::GetLearnRounds_PerInput();
			do {
				DoInputSource();
			} while(--rounds > 0);
			Classifier::Post_Learning();
			break;

		case Types::Cmd_Validate:
		case Types::Cmd_Classify:
			DoInputSource();
			Classifier::StatsValidation();
			break;

		case Types::Cmd_ModelGraphs:
			DoInputSource();
			break;

		case Types::Cmd_Idle:
		case Types::Cmd_Parse:
		default:
			break;

	}
	Classifier::Purge();
}

int64_t
Utils::CheckSyntax_AndAssign_NodeId(const char* end) {
	int64_t id(-1);
	int ret(1);
	char *what(NULL), *syntax(NULL);
	what = strpbrk((char*)end,"0123456789");
	syntax = what;
	while(what != NULL && ret != 0 && syntax-- > end)
		ret = isalpha(*syntax);
	if(what != NULL && ret != 0)
		id = strtol(what,NULL,0);
	return id;
}

bool
Utils::Reading_DynamicEdge(const char* b, int64_t& id1, int64_t& id2) {
	char *q(NULL);
	q = strchr((char*)b,';');
	id1 = CheckSyntax_AndAssign_NodeId(b);
	id2 = CheckSyntax_AndAssign_NodeId(++q);
	return (id1 > -1 && id2 > -1);
}

bool
Utils::Loading_DynamicEdges(const std::string& fn) {
	std::vector<GraphGen::Node<Types::Node> >::iterator n_id;
	GraphGen::Node<Types::Node> *np1(NULL), *np2(NULL);
	std::vector<Types::Edge>::iterator e_end;
	int64_t id1(0), id2(0), pre_id1(-1);
	size_t no_assigned(0);
	float dist(0.);
	static char b[1024];
	bool ret(false);
	FILE* f(NULL);

	m_Edges.clear();
	m_Edges.resize(((m_Max_NodeId+1) * m_Max_NodeId)/2 + 1);
	m_Backbone.resize(m_Max_NodeId+1);
	m_Nodes_p.resize(m_Max_NodeId+1);
	fill(m_Nodes_p.begin(),m_Nodes_p.end(),(GraphGen::Node<Types::Node>*)NULL);
	fill(m_Backbone.begin(),m_Backbone.end(),0);
	NodeFeatureLoader::LogFn("dists>",fn.c_str());
	f = fopen(fn.c_str(),"r");
	if(f != NULL) {
		do {
			ret = false;
			if(!(fscanf(f,"%s",(char*)&b) < 1) && !(fscanf(f,"%f",&dist) < 1)) {
				LOG_TRAIN_UTILS_3("->%s\t%f",b, dist)
				ret = Reading_DynamicEdge(b,id1,id2);
				if(ret && dist <= m_Opt_DynmEdge_Threshold) {
					assert((size_t)id1 < m_Max_NodeId + 1);
					assert((size_t)id2 < m_Max_NodeId + 1);
					e_end = m_Edges.begin();
					e_end += no_assigned;
					if(find_if(m_Edges.begin(),e_end,Types::FindEdge(id1,id2)) == e_end) {
						if(id1 != pre_id1) {
							if(m_Nodes_p[id1] != NULL)
								np1 = m_Nodes_p[id1];
							else {
								n_id = find_if(m_Nodes.begin(),m_Nodes.end(),Types::FindNodeId(id1));
								m_Nodes_p[id1] = np1 = &m_Nodes[n_id - m_Nodes.begin()];
							}
							pre_id1 = id1;
						}
						if(m_Nodes_p[id2] != NULL)
							np2 = m_Nodes_p[id2];
						else {
							n_id = find_if(m_Nodes.begin(),m_Nodes.end(),Types::FindNodeId(id2));
							m_Nodes_p[id2] = np2 = &m_Nodes[n_id - m_Nodes.begin()];
						}
						if(np1 != NULL && np2 != NULL) {
							m_Edges[no_assigned].m_Node_1 = np1;
							m_Edges[no_assigned].m_Node_2 = np2;
#ifdef DEBUG_TRAIN_UTILS_INPUT
							LOG_TRAIN_UTILS_6("+>%s%lu;%s%lu\t%f",m_Edges[no_assigned].m_Node_1->m_Nd.m_Aa_Name.c_str(),m_Edges[no_assigned].m_Node_1->m_Id,
																  m_Edges[no_assigned].m_Node_2->m_Nd.m_Aa_Name.c_str(),m_Edges[no_assigned].m_Node_2->m_Id,dist)
#endif // DEBUG_TRAIN_UTILS_INPUT
							no_assigned++;
							assert(no_assigned < m_Edges.size());
							if(abs((signed)id1-(signed)id2) == 1) {
								m_Backbone[std::min(id1,id2)] = 1;
								assert(m_Backbone.size() > (size_t)std::min(id1,id2));
								LOG_TRAIN_UTILS("c-alpha>")
							}
						}
					}
				} else {
					if(!ret)
						LOG_TRAIN_UTILS_3("ERROR: edge identifier %s> %s",fn.c_str(),b)
				}
			} else {
				if(!feof(f)) {
					LOG_TRAIN_UTILS_3("ERROR: edge identifier %s> %s",fn.c_str(),b)
				}
			}
		} while(ret && !feof(f));
		fclose(f);
	}
	m_No_Edges = no_assigned;
	Append_C_alpha_BackBone();
	ret = true;
	m_ReadStateStruct = Readstate_idle;
#if 0
	for(std::vector<GraphGen::Node<Types::Node>*>::const_iterator o(m_Nodes_p.begin()); o != m_Nodes_p.end(); o++) {
		if((*o) != NULL)
			LOG_TRAIN_UTILS_2("%lu,",(*o)->m_Id)
	}
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
Utils::Append_C_alpha_BackBone() {
	for(size_t n(m_Min_NodeId); n <= m_Max_NodeId - 1; n++) {
		Types::Edge edge;
		if(!m_Backbone[n]) {
			if(m_Nodes_p[n] != NULL)
				edge.m_Node_1 = m_Nodes_p[n];
			else {
				std::vector<GraphGen::Node<Types::Node> >::iterator it_nd = find_if(m_Nodes.begin(),m_Nodes.end(),Types::FindNodeId(n));
				if(it_nd != m_Nodes.end())
					m_Nodes_p[n] = edge.m_Node_1 = &(*it_nd);
			}
			if(m_Nodes_p[n+1] != NULL)
				edge.m_Node_2 = m_Nodes_p[n+1];
			else {
				std::vector<GraphGen::Node<Types::Node> >::iterator it_nd = find_if(m_Nodes.begin(),m_Nodes.end(),Types::FindNodeId(n+1));
				if(it_nd != m_Nodes.end())
					m_Nodes_p[n+1] = edge.m_Node_2 = &(*it_nd);
			}
			if(edge.m_Node_1 != NULL && edge.m_Node_2 != NULL ) {
				m_Edges[m_No_Edges] = edge;
#ifdef DEBUG_TRAIN_UTILS_INPUT
				LOG_TRAIN_UTILS_5("ca-add>%s%lu;%s%lu",m_Edges[m_No_Edges].m_Node_1->m_Nd.m_Aa_Name.c_str(),m_Edges[m_No_Edges].m_Node_1->m_Id,
												  m_Edges[m_No_Edges].m_Node_2->m_Nd.m_Aa_Name.c_str(),m_Edges[m_No_Edges].m_Node_2->m_Id)
#endif // DEBUG_TRAIN_UTILS_INPUT
				m_No_Edges++;
			}
		}
	}
}

NodeFeatureLoader::NodeFeatureLoader(const bool forget_first_line):m_forget_first_line(forget_first_line) {
	memset((void*)m_B,0,sizeof(m_B));
}

void
NodeFeatureLoader::LogFn(const char* log, const char* fn) {
#ifdef DEBUG_TRAIN_UTILS
	LOG_TRAIN_UTILS_3("%s %s",log,fn)
#endif // DEBUG_TRAIN_UTILS
}

NodeFeatureLoader::~NodeFeatureLoader() {  }

bool
NodeFeatureLoader::readline(FILE* f) {
	char buf[1024];
	memset((void*)buf,0,sizeof(buf));
	char* c(buf);
	strcpy(c,"\033[32m");
	do {
		*c++ = fgetc(f);

	} while(!feof(f) && *(c-1) != '\n');
	strcpy(c-1,"\033[0m");
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_2("%s",buf);
#endif //DEBUG_TRAIN_UTILS_INPUT

	return (!feof(f));
}

bool
NodeFeatureLoader::Read_Features(const char* log, const char* filename) {

	FILE* f(NULL);
	struct stat buf;
	int ret(0);
	int64_t id(-1);
	bool retval(true);
	char c(0);

	LogFn(log,filename);
	if(stat(filename,&buf) == 0) {
		f = fopen(filename,"r");
		if(f != NULL) {
			if(m_forget_first_line)
				retval = readline(f);
			while(retval && !feof(f) && (ret != EOF || c != EOF)) {
				c = fgetc(f);
				if(c == '\n' || c == EOF) {
					continue;
				} else if(c != '#') {
					fseek (f,-1,SEEK_CUR);
					if((ret = Read_Value(f)) != EOF) {
						id = Utils::CheckSyntax_AndAssign_NodeId(m_B);
						if(id > -1) {
							std::vector<GraphGen::Node<Types::Node> >::iterator n_id = find_if(Utils::m_Nodes.begin(),Utils::m_Nodes.end(),Types::FindNodeId(id));
							if(n_id != Utils::m_Nodes.end()) {
								Assign_Feature(*n_id);
							} else {
								LOG_TRAIN_UTILS_3("ERROR: could not assign feature value %s> %s",filename,m_B)
							}
						} else {
							/* hard syntax error: current node not in loaded set
							 */
							retval = false;
							LOG_TRAIN_UTILS_3("ERROR: syntax error while reading features %s> %s",filename,m_B)
						}
					}
				} else
					readline(f);
			}
			fclose(f);
		} else
			retval = false;
	} else
		retval = false;

	return retval;
}

int
LabellingLoader::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s %d",m_B,&m_Label);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_3("->%s\t%d",m_B,m_Label)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
LabellingLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	/**
	 * CHANGED-2012-02-20: RASA values \le 15% are bounding to label-0
	 * Amino acids are then considered to be in complex interior
	 */
#ifndef DBG_DISHONEST_CASINO
	if(node.m_Nd.m_Score_rASA <= (float)Features::RASA_Threshold_ForBeing_NonIF_scaled)
		m_Label = 0;
#endif // DBG_DISHONEST_CASINO
	m_TP_ref += m_Label;
	node.m_Nd.m_RefLabel = (Types::Node::RefLabel)(node.m_Nd.m_RefLabel | (m_Label == 1 ? Types::Node::RefLabel_If : Types::Node::RefLabel_NoIf));
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("+>%s%lu\t%d",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,(node.m_Nd.m_RefLabel == Types::Node::RefLabel_NoIf ? 0 : 1))
#endif // DEBUG_TRAIN_UTILS_INPUT
}

bool
LabellingLoader::Accept_Object() const {
	return (m_TP_ref >= LabellingLoader_Accept_Object);
}

int
ConserveLoader::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s %f",m_B,&m_ConsVal);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_3("->%s\t%f",m_B,m_ConsVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
ConserveLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	m_ConsVal *= (float)Features::RASA_CONSERV_Val_COMMON_Scale;
	if(m_ConsVal < (float)Features::CONSERV_Threshold_TakenInto_Account_Le)
		node.m_Nd.m_Score_Conserv = m_ConsVal;
	else
		node.m_Nd.m_Score_Conserv = (float)Features::CONSERV_Threshold_TakenInto_Account_Le;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("+>%s%lu\t%f",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,m_ConsVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
}

int
EprosLoader::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s %f",m_B,&m_EprosVal);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_3("->%s\t%f",m_B,m_EprosVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
EprosLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	node.m_Nd.m_Score_Epros = m_EprosVal;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("+>%s%lu\t%f",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,m_EprosVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
}

int
RgLoader::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s %f",m_B,&m_RgVal);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_3("->%s\t%f",m_B,m_RgVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
RgLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	node.m_Nd.m_Score_Rg = m_RgVal;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("+>%s%lu\t%f",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,m_RgVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
}

int
PssmLoader::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s %f %f",m_B,&m_Pssm_If, &m_Pssm_NoIf);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("->%s\t%f\t%f",m_B,m_Pssm_If,m_Pssm_NoIf)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
FeLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	node.m_Nd.m_Score_Fe = m_FeVal;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_4("+>%s%lu\t%f",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,m_FeVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
}

int
FeLoader::Read_Value(FILE* f) {
	int ret(0), dummy(0);
	ret = fscanf(f,"%s %f %d",m_B,&m_FeVal, &dummy);
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_3("->%s\t%f",m_B,m_FeVal)
#endif // DEBUG_TRAIN_UTILS_INPUT
	return ret;
}

void
PssmLoader::Assign_Feature(GraphGen::Node<Types::Node>& node) {
	node.m_Nd.m_Score_PSSM[Types::Node::ScorePssm_If] = m_Pssm_If;
	node.m_Nd.m_Score_PSSM[Types::Node::ScorePssm_NoIf] = m_Pssm_NoIf;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	LOG_TRAIN_UTILS_5("+>%s%lu\t%f\t%f",node.m_Nd.m_Aa_Name.c_str(),node.m_Id,m_Pssm_If,m_Pssm_NoIf)
#endif // DEBUG_TRAIN_UTILS_INPUT
}

bool
PssmLoaderV2::Read_PssmBlockAnd_Assign() {
	bool ret(false);
	char* what(NULL);
	double v1(0.), v2(0.);

	ret = fscanf(m_Fh,"%s",m_B);
	what = strpbrk((char*)m_B,",");
	if(what++ != NULL) {
		v1 = strtod((char*)m_B,NULL);
		assert(isalpha(*what++) != 0);
		v2 = strtod(++what,NULL);
		ret = true;
		m_Node.m_Nd.m_Pssm_OrgAbs[m_Slot * Types::Node::Pssm_Org_CntRes + m_Curr_Res] = v1;
		m_Node.m_Nd.m_Pssm_Org[m_Slot * Types::Node::Pssm_Org_CntRes + m_Curr_Res] = v2;
	}
	return ret;
}

int
PssmLoaderV2::Read_Value(FILE* f) {
	int ret(0);
	ret = fscanf(f,"%s",m_B);
	m_Fh = f;
	return ret;
}

void
PssmLoaderV2::Assign_Feature(GraphGen::Node<Types::Node>& n) {
	int ret(0);
	char *what(NULL);
	std::vector<GraphGen::Node<Types::Node> >::iterator n_id;

	m_Node = n;
	m_Node.m_Nd.m_Pssm_Org.resize(Types::Node::Pssm_Org_Slots * Types::Node::Pssm_Org_CntRes);
	m_Node.m_Nd.m_Pssm_OrgAbs.resize(Types::Node::Pssm_Org_Slots * Types::Node::Pssm_Org_CntRes);
	fill(m_Node.m_Nd.m_Pssm_Org.begin(),m_Node.m_Nd.m_Pssm_Org.end(),0.);
	fill(m_Node.m_Nd.m_Pssm_OrgAbs.begin(),m_Node.m_Nd.m_Pssm_OrgAbs.end(),0.);
	for(int slot(0); ret >= 0 && slot < Types::Node::Pssm_Org_Slots; slot++) {
		what = strpbrk((char*)m_B,",");
		if(what != NULL) {
			m_Slot = strtol(++what,NULL,0);
			ret = (m_Slot == slot ? 0 : -1);
			m_Curr_Res = 0;
			for(; ret >= 0 && m_Curr_Res < Types::Node::Pssm_Org_CntRes; m_Curr_Res++)
				ret = (Read_PssmBlockAnd_Assign() ? 0 : -1);
		}
		if(slot < Types::Node::Pssm_Org_Slots - 1)
			ret = fscanf(m_Fh,"%s",m_B);
	}
	n = m_Node;
#ifdef DEBUG_TRAIN_UTILS_INPUT
	if(Utils::MoreVerbose()) {
		for(int slot(0); slot < Types::Node::Pssm_Org_Slots; slot++) {
			LOG_TRAIN_UTILS_4("%s%lu,%d",n.m_Nd.m_Aa_Name.c_str(),n.m_Id,slot)
			for(int res(0); res < Types::Node::Pssm_Org_CntRes; res++) {
				LOG_TRAIN_UTILS_3("%.0f,%.4f",m_Node.m_Nd.m_Pssm_OrgAbs[slot * Types::Node::Pssm_Org_CntRes + res],m_Node.m_Nd.m_Pssm_Org[slot * Types::Node::Pssm_Org_CntRes + res])
			}
		}
	}
#endif // DEBUG_TRAIN_UTILS_INPUT
}

WeightsLoader::~WeightsLoader()
{  }

bool
WeightsLoader::ReadPortion(const char* filename, FILE* f, std::vector<double>& weights, const size_t no_w) {
	int ret(0);
	char c(0);
	float w(0.);
	size_t no_val(no_w);
	if(Utils::MoreVerbose()) {
		LOG_TRAIN_UTILS_2("%lu",no_w)
	}
	while(no_val > 0 && !feof(f) && (ret != EOF || c != EOF)) {
		c = fgetc(f);
		if(c == '\n' || c == EOF) {
			continue;
		} else if(c != '#') {
			fseek (f,-1,SEEK_CUR);
			ret = fscanf(f,"%f",&w);
			no_val--;
			if(ret == 1) {
				weights.push_back(w);
				if(Utils::MoreVerbose()) {
					LOG_TRAIN_UTILS_2("%f",w)
				}
			} else {
				if(ret != EOF)
					LOG_TRAIN_UTILS_2("reading failure %s",filename)
			}
		} else
			NodeFeatureLoader::readline(f);
	}

	return (weights.size() == no_w);
}

bool
WeightsLoader::ReadValues(const char* log, const char* filename, std::vector<double>& w_q, std::vector<double>& w_g) {
	FILE* f(NULL);
	struct stat buf;
	int ret(0);
	bool rt(false);
	size_t no_w_q(0), no_w_g(0);

	if(stat(filename,&buf) == 0) {
		NodeFeatureLoader::LogFn(log,filename);
		f = fopen(filename,"r");
		if(f != NULL) {
			if(fgetc(f) == '#')
				rt = NodeFeatureLoader::readline(f);
			else {
				rt = true;
				fseek (f,-1,SEEK_CUR);
			}
			if(rt)
				ret = fscanf(f,"%lu",&no_w_q);
			if(ret != EOF && ret == 1) {
				w_q.clear();
				rt = ReadPortion(filename,f,w_q,no_w_q);
				ret = fscanf(f,"%lu",&no_w_g);
				if(!rt || ret == EOF || ret != 1 || !ReadPortion(filename,f,w_g,no_w_g)) {
					LOG_TRAIN_UTILS_2("reading failure %s",filename)
				}
			} else {
				LOG_TRAIN_UTILS_2("reading failure %s",filename)
			}
			fclose(f);
		} else {
			LOG_TRAIN_UTILS_2("%s: File not found.",filename)
		}
	}

	return (rt && no_w_g != 0 && no_w_q != 0 && w_g.size() == no_w_g && w_q.size() == no_w_q);
}

} // Training
