// Made by Peter A Noble November 2020
// panoble2017@gmail.com

#include<iostream>
#include<string>
#include<fstream>
#include<sstream>
#include<cstring>
#include <math.h>

using namespace std;

// g++ ANOVA.cpp -o ANOVA
// ./ANOVA 3 8 test.txt anova_out.txt 
// 3 is the number of groups
// 8 is the number of replicates
// test.txt is the input file
// anova_out.txt is formated to drop into an Excel file containing multiple samples

// purpose is to determine the F statistic and SNK tests for thousands of individual samples.

int main(int argc, char*argv[]) {

ifstream in(argv[3]); 
ofstream out(argv[4]); 	
string sentence;
int num=100000;
int standard=1000;

char** word = new char*[num];
for (int s = 0; s < num; s++)
	{
	word[s] = new char[standard];
	}

char * pos2;
int h=0;
int q1=0;
int s=0;
int t=0;
int groups=atoi(argv[1]);  //3
int reps=atoi(argv[2]);  //7
double column[num];
double col[groups][standard];
double col2[groups][standard];  //new
double squares[groups][standard];
double sums[standard];
double count_sums;
double count_sums2;
double count_means[standard];
double sum_sum_x_squared_div_nt;
double sum_sum_x_squared_div_nt2;
double sums2[standard];
double sum_square_x_div_n[groups];
double SSb;
double SSw;
double MSb;
double MSw;
double F;
double count2=0;
double count3=0;
double q=0,d=0, snk=0;

//double critical_value=2.85;
double critical_value=3.47; //4.93 critical_value=2.85; 

int count_zeros=0;

//double var[groups][4];
double sum_of_x_s=0;
double sum_sum_x=0;
double sum_sum_x_squared_div_n=0;
int dfb=0;
int dfw=0;
int nt=0;
int count=0;
double count_squares;

while(!in.eof())  // read in the matrix
    {
	getline(in,sentence); 
	//cout << sentence << "\n"; //exit(1); 
	h=0;
	char * char_array = new char [sentence.length()+ 1];	
	strcpy (char_array, sentence.c_str());
	char delimiters[2];
	strcpy(delimiters,"\t");
    pos2 = strtok (char_array, delimiters);
	
  	while (pos2 != NULL)
   		{
   		word[h]=(pos2);
   		pos2 = strtok (NULL, delimiters);
		h=h+1;
  		}
//	cout << "h=" << h << "\n" << flush; //exit(1); 
	for (int s=0; s<(h);s++)  
		{
		column[s]=atof(word[s]);
		//cout << column[s] << "\t";	
		}
		//cout << "\n" << flush; exit(1); 

// reset vars to zero
count_sums=0;
count_sums2=0.0;
sum_sum_x_squared_div_nt=0.0;;
sum_sum_x_squared_div_nt2=0.0;;
SSb=0.0;;
SSw=0.0;;
MSb=0.0;;
MSw=0.0;;
F=0.0;;
count2=0;
count3=0;

sum_of_x_s=0.0;
sum_sum_x=0.0;
sum_sum_x_squared_div_n=0.0;
dfb=0;
dfw=0;
nt=0;
count=0;
count_squares=0.0;
q1=0;
dfb=0;
nt=0;
dfw=0;
s=0;
t=0;
q=0;
d=0;
snk=0;
//

for (int s=0; s<(groups);s++)   
	{
	for (int t=0; t<(reps);t++)
		{
		col[s][t]=column[count];
		count=count+1;
		}
	}	

for (int s=0; s<(groups);s++)   
	{
	count_zeros=0;
	for (int t=0; t<(reps);t++)
		{
		if (col[s][t]==0) {count_zeros=count_zeros+1;}
		}
	if (count_zeros!=reps)
		{
		for (int t=0; t<(reps);t++)
			{
			col2[q1][t]=col[s][t];
			}
			q1=q1+1;
		}	
	}	

for (int s=0; s<(q1);s++)   
	{
	for (int t=0; t<(reps);t++)
		{
		cout << col2[s][t] << "\t";
		}
	cout << "\n";
	}	
cout << "\n";
	
	for (int s=0; s<(q1);s++)   
	    {		
		//count=0;
		count_squares=0.0;  
		for (int t=0; t<(reps);t++)
			{   
			squares[s][t]=col2[s][t]*col2[s][t];
			count_squares=squares[s][t]+count_squares;
			}
		}
dfb=groups-1;
nt=groups*reps;
dfw=(nt)-(groups); 

double sum_squares=0;
//cout << "reps=\t" << reps << "\n" << flush; exit(1);

for (int s=0; s<(q1);s++)   
    {	
    count=0; count2=0; count3=0; 
 	for (int t=0; t<(reps);t++)   
   		{	
		count3=col2[s][t]+count3;
		count2=squares[s][t]+count2;
		}
	sums[s]=count3;
	sums2[s]=count2;
	}	

// print out array
	for (int t=0; t<(reps);t++)
		{   
		for (int s=0; s<(q1);s++)   
  	  		{		
			cout <<	col2[s][t] << "\t" << squares[s][t] << "\t";
			}
		cout << "\n"; 
		}
		cout << "\n"; 
	
		cout << "sums[]\t";
		for (int s=0; s<(q1);s++)   
  	  		{		
			cout <<	sums[s] << "\t";
			count_means[s]=double(double(sums[s])/reps);  //new
			count_sums=sums[s]+count_sums;
			}
		cout << count_sums << "\n" << flush; 

//new
		cout << "means[]\t";

		for (int s=0; s<(q1);s++)   
  	  		{		
			cout <<	count_means[s] << "\t";
			}
		cout << "\n" << flush; 
//new

		cout << "sum2[]\t";
		for (int s=0; s<(q1);s++)   
  	  		{		
			cout <<	sums2[s] << "\t";
			count_sums2=sums2[s]+count_sums2;
			}
		cout << count_sums2 << "\n" << flush; 

	sum_sum_x_squared_div_nt=double(count_sums*count_sums)/nt;
	cout << "sum_sum_x_squared_div_nt=" << sum_sum_x_squared_div_nt << "\n";


	cout << "dfb=" << dfb << "\n";
	cout << "nt=" << nt << "\n";
	cout << "dfw=" << dfw << "\n"<< flush;;
	
	cout << "sum_square_x_div_n[s]\t";
	
	for (int s=0; s<(q1);s++)   
  	  		{	
  	  		sum_square_x_div_n[s]=double(sums[s]*sums[s])/reps;
			
			cout << sum_square_x_div_n[s] << "\t";
			sum_sum_x_squared_div_nt2=sum_square_x_div_n[s]+sum_sum_x_squared_div_nt2;
			}
		cout << "\nsum_sum_x_squared_div_nt2\t" << sum_sum_x_squared_div_nt2;
	
		cout << "\n" << flush; 
		SSb=sum_sum_x_squared_div_nt2-sum_sum_x_squared_div_nt;
		cout << "SSb=\t" << SSb << "\n";
		SSw=count_sums2-sum_sum_x_squared_div_nt2;
		cout << "SSw=\t" << SSw << "\n";

		MSb=double(SSb)/dfb;
		cout << "MSb=\t" << MSb << "\n";

		MSw=double(SSw)/dfw;
		cout << "MSw=\t" << MSw << "\n";
		F=double(MSb)/MSw;
		cout << "F=\t" << F << "\n";
		out << F << "\t";
		
	cout <<  "SNK tests\n";  // critical value is 2.85
		for (int s=0; s<(q1);s++)   //groups =>q1
  	  		{	
			for (int t=s+1; t<(q1);t++)    //groups =>q1
  	  			{	
  	  			q = abs(count_means[s]-count_means[t]);
  	  			d = sqrt(double(MSw/reps));
  	  			snk=double(double(q)/d);
  	  			cout << s+1 << "\t" << t+1  << "\t" << q << "\t" << d << "\t" << snk  << "\n";
  	  			if (snk>=critical_value)
  	  				{
  	  				//out << s+1 << "\t" << t+1  << "\t" << q << "\t" << d << "\t" << snk  << "\n";
  	  				out << s+1 << "\t" << t+1  << "\t";
  	  				out << count_means[s] << "\t" << count_means[t] << "\t";
					}
				}
			}
	out << "\n";
	}	
return 0;
}
