/*
#ifndef __OPENCL_VERSION__ //This should fail during an actual openCL compile, used only to trick Eclipse into syntax highlighting this file as "C" code.
#define __kernel
#define __global
#define kernel
#define global
#define constant
#define local
#define float2 float
#define int3 int
#define float3 float
#define float4 float
#define uchar4 char
#endif
*/

#define circleParamIndexX 0
#define circleParamIndexY 1
#define circleParamIndexR 2

#define CIRCLE_DETECTION_GRID_SIZE 101
#define CIRCLE_DETECTION_NB_RESULTS_MAX 3
#define CIRCLE_DETECTION_RESULT_UNIT_SIZE 9
#define CIRCLE_DETECTION_RESULT_HEADER_SIZE 2
#define CIRCLE_DETECTION_RESULT_SIZE 29
#define RESULT_INDEX_HEADER_NBFORMS 0
#define RESULT_INDEX_HEADER_NBCIRCLES 1
#define RESULT_INDEX_CU 0
#define RESULT_INDEX_CV 1
#define RESULT_INDEX_CN 2
#define RESULT_INDEX_CX 3
#define RESULT_INDEX_CY 4
#define RESULT_INDEX_CZ 5
#define RESULT_INDEX_R 6
#define RESULT_INDEX_SCORE 7
#define RESULT_INDEX_NBCELLS 8

#define CIRCLE_DETECTION_NB_POINTS_MIN 10
#define CIRCLE_DETECTION_NB_POINTS_MAX 400


#define dLambdaInitial 0.001
#define thresholdDeltaF 0.001
#define lambdaScaleFactor 10
//Max number if iterations for Hessienne.
#define ITER_MAX 20

float deriveeXCercleFast(float x, float y, float cx, float cy, float r, float D)
{		
	return (cx-x)/D;
}
float deriveeYCercleFast(float x, float y, float cx, float cy, float r, float D)
{		
	return (cy-y)/D;
}
float deriveeRCercle(float x, float y, float cx, float cy, float r)
{		
	return -1;
}
float distanceSigneeFastCircle(float x, float y, const float *parametres, float D) 
{
	return D - parametres[circleParamIndexR];
}
float distanceToCenterCircle(float x, float y, const float *parametres)
{
	float cx = parametres[circleParamIndexX];
	float cy = parametres[circleParamIndexY];
	float d = sqrt((x-cx)*(x-cx)+(y-cy)*(y-cy));
	return d;
}
float getSquareDistanceCircle(float x, float y, const float *parametres)
{
	float D = distanceToCenterCircle(x, y, parametres);
	//float dist = distanceSigneeFastCircle(x, y, parametres, D);
	float dist = D - parametres[circleParamIndexR];
	return dist * dist;
}
float deriveePartielleFastCircle(int i, float x, float y, const float *parametres, float D)
{
	float cx = parametres[circleParamIndexX];
	float cy = parametres[circleParamIndexY];
	float r = parametres[circleParamIndexR];
	float result;
	if (i==circleParamIndexX)
	{
		result = deriveeXCercleFast(x, y, cx, cy, r, D);
		return result;
	}
	if (i==circleParamIndexY)
	{
		result = deriveeYCercleFast(x, y, cx, cy, r, D);
		return result;
	}
	if (i==circleParamIndexR)
	{
		result = deriveeRCercle(x, y, cx, cy, r);
		return result;
	}
	return 0;
}
float calculateSumMeanSquaresCircle(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, const float *parametres)
{
	float somme = 0;
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyGrid[ind] == formId)
				somme += getSquareDistanceCircle((float)ix, (float)iy, parametres);
		}
	}
	return somme;
}
void hessienne(float x, float y, const float *parametres, float *H)
{
	// Matrice des dérivées seconde
	// Approximation valable au voisinage de la solution (on ne prend en compte que les dérivées premières)
	float deriveesPartielles[3] = {0};
	float D = distanceToCenterCircle(x, y, parametres);
	for (int i=0; i<3; i++)
		deriveesPartielles[i] = deriveePartielleFastCircle(i, x, y, parametres, D);
		
	for (int i=0; i<3; i++)
	{
		for (int j=i; j<3; j++)
		{
			H[i*3+j] = 2 * deriveesPartielles[i] * deriveesPartielles[j];
			H[j*3+i] = H[i*3+j];
		}
	}
}
void hessienneComplete(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, const float *parametres, float *hess)
{
	for (int k=0; k<9; k++)
		hess[k] = 0;
		
	float tmp[9] = {0};
		
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyGrid[ind] == formId)
			{
				hessienne((float)ix, (float)iy, parametres, tmp);
				for (int k=0; k<9; k++)
					hess[k] += tmp[k];
			}
		}
	}
}
float gradiantFastCircle(int i, float x, float y, const float *parametres, float D)
{
	float val = deriveePartielleFastCircle(i, x, y, parametres, D);
	val *= 2*distanceSigneeFastCircle(x, y, parametres, D);
	return val;
}
void gradiantComplet(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, const float *parametres, float *gradiant)
{
	for (int i=0; i<3; i++)
		gradiant[i] = 0;
		
	float x, y, z, D;
	
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyGrid[ind] == formId)
			{
				D = distanceToCenterCircle((float)ix, (float)iy, parametres);
				for (int i=0; i<3; i++)
					gradiant[i] += gradiantFastCircle(i, (float)ix, (float)iy, parametres, D);
			}
		}
	}
}
int nbPointsActifs(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY)
{		
	int nbPts = 0;
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyGrid[ind] == formId)
				nbPts++;
		}
	}
	return nbPts;
}
float calculateFandGandHCircle(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, const float *parametres, float *G, float *H)
{
	//Calculer F au point a
	float F = calculateSumMeanSquaresCircle(topologyGrid, formId, nbCellsX, nbCellsY, parametres);
	//Calculer g et H au point a
	//   g = -0.5 * gradiant
	//   H = 0.5 * hessienne
	//Gradiant
	gradiantComplet(topologyGrid, formId, nbCellsX, nbCellsY, parametres, G);
	for (int i=0; i<3; i++) 
		G[i] *= -0.5;
	//Hessienne
	hessienneComplete(topologyGrid, formId, nbCellsX, nbCellsY, parametres, H);
	for (int k=0; k<9; k++) 
		H[k] *= 0.5;
	return F;
}
float methodeLevenbergMarquardtCircle(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY,
	const float *parametresInitiaux, float *parametresOptimises)
{
	int nbPts = nbPointsActifs(topologyGrid, formId, nbCellsX, nbCellsY);
	if (nbPts < 3)
		return infinity;

	float dLambda = dLambdaInitial;
	float dLambdaMin = mathEpsilon;
	float dLambdaMax = infinity;
		
	float parametres[3] = {};
	for (int i=0; i<3; i++)
		parametres[i] = parametresInitiaux[i];
			
	float deltas[3] = {0};
	
	int iter = 0;
	
	float tryParam[3] = {0};
		
	bool bStop = false;

	//-------------------------------------------------------------------
	//-------------------------------------------------------------------
	//Calculer F(a initial)
	//-------------------------------------------------------------------
	//-------------------------------------------------------------------
	float F = calculateSumMeanSquaresCircle(topologyGrid, formId, nbCellsX, nbCellsY, parametres);
	float 	g[3] = {0};
	float 	H[9] = {0};
	float 	Hbarre[9] = {0};
	float	solution[3] = {0};
	
	bool bStateMoved = true;
	
	while (!bStop)
	{
		//-------------------------------------------------------------------
		//-------------------------------------------------------------------
		//Calculer g et Hbarre au point a
		//   g = -0.5 * gradiant
		//   H = 0.5 * hessienne
		//   Hbarre(j,j) = H(j,j)*(1+lambda)
		//   Hbarre(j,k) = H(j,k)
		//-------------------------------------------------------------------
		//-------------------------------------------------------------------
		if (bStateMoved)
		{
			calculateFandGandHCircle(topologyGrid, formId, nbCellsX, nbCellsY, parametres, g, H);
			//Hessienne augmentée (initialisation seulement ici)
			for (int k=0; k<9; k++)
				Hbarre[k] = H[k];
		}
		//Hessienne augmentée : ajustement des valeurs diagonales
		for (int i=0; i<3; i++)
			Hbarre[i*3+i] = H[i*3+i]*(1+dLambda);
	
		//-------------------------------------------------------------------
		//-------------------------------------------------------------------
		//Résoudre H.deltaA = g en deltaA
		//-------------------------------------------------------------------
		//-------------------------------------------------------------------
		bool bOk = solve(Hbarre, g, solution, 3);
		if (bOk)
		{
			for (int i=0; i<3; i++)
				deltas[i] = solution[i];
				
			//Vérification que le rayon n'est pas 0 (non autorisé)
			if (fabs(deltas[circleParamIndexR]) < mathEpsilon)
				deltas[circleParamIndexR] = mathEpsilon;
					
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			//Calculer F(a + deltaA)
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			for (int i=0; i<3; i++)
				tryParam[i] = parametres[i] + deltas[i];
					
			float newF = calculateSumMeanSquaresCircle(topologyGrid, formId, nbCellsX, nbCellsY, tryParam);
	
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			//Si F(a + deltaA) >= F(a) faire
			//      lambda = 10 * lambda
			//Sinon faire
			//      lambda = 0.1 * lambda
			//      a = a + deltaA
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			if (newF>=F)
			{
				dLambda *= lambdaScaleFactor;
				bStateMoved = false;
			}
			else
			{
				dLambda /= lambdaScaleFactor;
				bStateMoved = true;
				for (int i=0; i<3; i++)
					parametres[i] = tryParam[i];
			}
	
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			//Critère d'arrêt
			//   Critère sur la variation de F : l’algorithme s’arrête lorsque F diminue de manière très faible
			//      delta F / Nouveau F < c 
			//      où c = 0.001 en pratique
			//   Critère sur la valeur de lambda : l’algorithme s’arrête lorsque lambda est devenu
			//      - soit très petit (<0.00000000000000000001)
			//      - soit très grand (>10000000000)
			//En pratique, on remarque que la convergence est en général atteinte rapidement (au bout de 3 ou 4 itérations)
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			if ( (dLambda<=dLambdaMin) || (dLambda>=dLambdaMax) )		
				bStop = true;
			if (fabs(newF-F)/newF < thresholdDeltaF) 				
				bStop = true;
			if (iter>ITER_MAX) 										
				bStop = true;

			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			//Ne pas oublier de faire "F = newF" si l'état du système a changé suite à une amélioration
			//-------------------------------------------------------------------
			//-------------------------------------------------------------------
			if (bStateMoved) 
				F = newF;
		}
		else
		{
			dLambda *= lambdaScaleFactor;
			bStateMoved = false; 
		}
		iter++;
	}
	for (int i=0; i<3; i++)
		parametresOptimises[i] = parametres[i];
		
	float score = F/(float)nbPts;
	
	return score;
}
bool passesEarlyOuts(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, 
	const float radiusMaxAllowedPix, float *boundingBox, const int nbPointsMinRequired, const int nbPointsMaxAllowed)
{
	boundingBox[0] = infinity; //xmin intentionaly initialized with positive infinity
	boundingBox[1] = infinity; //ymin intentionaly initialized with positive infinity
	boundingBox[2] = infinity_neg; //xmax intentionaly initialized with negative infinity
	boundingBox[3] = infinity_neg; //ymax intentionaly initialized with negative infinity
	
	int nbPts = 0;
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyGrid[ind] == formId)
			{
				if (ix < boundingBox[0])
					boundingBox[0] = (float)ix;
				if (iy < boundingBox[1])
					boundingBox[1] = (float)iy;
				if (ix > boundingBox[2])
					boundingBox[2] = (float)ix;
				if (iy > boundingBox[3])
					boundingBox[3] = (float)iy;
				nbPts++;
			}
		}
	}
	if (nbPts < 3)
		return false;
	if (nbPointsMinRequired > 0 && nbPts < nbPointsMinRequired)
		return false;
	if (nbPointsMaxAllowed > 0 && nbPts > nbPointsMaxAllowed)
		return false;
	
	if (boundingBox[0] == infinity || boundingBox[1] == infinity 
		|| boundingBox[2] == infinity_neg || boundingBox[3] == infinity_neg)
		return false;
		
	float lengthX = boundingBox[2] - boundingBox[0];
	float lengthY = boundingBox[3] - boundingBox[1];
	float lengthMax = max(lengthX, lengthY);
	if (radiusMaxAllowedPix > 0 && lengthMax >  2 * radiusMaxAllowedPix)
		return false;
	
	return true;
}
float doCircleLevenbergMarquardt(uchar* topologyGrid, const uchar formId, const int nbCellsX, const int nbCellsY, const float radiusMaxAllowedPix,
	float *parametresOptimises, const int nbPointsMinRequired, const int nbPointsMaxAllowed)
{
	float boundingBox[4];
	bool bOk = passesEarlyOuts(topologyGrid, formId, nbCellsX, nbCellsY, radiusMaxAllowedPix, boundingBox, nbPointsMinRequired, nbPointsMaxAllowed);
	if (!bOk)
		return infinity;
	
	float x0 = 0.5*(boundingBox[0]+boundingBox[2]);
	float y0 = 0.5*(boundingBox[1]+boundingBox[3]);
	float r0 = 0.5*max(boundingBox[2] - boundingBox[0], boundingBox[3] - boundingBox[1]);
	
	float parametresInitiaux[3] = { x0, y0, r0 };
	float score = methodeLevenbergMarquardtCircle(topologyGrid, formId, nbCellsX, nbCellsY, parametresInitiaux, parametresOptimises);
	return score;
}



int fillTopologicalGrid(uchar *topologyGrid, const int nbCellsX, const int nbCellsY,
	global const float4 *pointsBuffer, const int nPoints, const int nbMaxPointsAllowed,
	const float *cartesianSystem, const float offsetZ, const float sliceSize, const float sliceThickness)
{
	int gridSize = nbCellsX * nbCellsY;
	float pixelDim = sliceSize / (float)nbCellsX;

	for (int k=0; k<gridSize; k++)
		topologyGrid[k] = 0;
	
	int granularity = (int)((float)nPoints / (float)nbMaxPointsAllowed);
	if (granularity < 1)
		granularity = 1;
	
	int nbCellsFilled = 0;
	for (int i=0; i<nPoints; i++)
	{
		if (i % granularity == 0)
		//if (i < nbMaxPointsAllowed)
		{
			float3 point = (float3)(pointsBuffer[i].x, pointsBuffer[i].y, pointsBuffer[i].z);
			float u = getLocalX(point, cartesianSystem);
			float v = getLocalY(point, cartesianSystem);
			float w = getLocalZ(point, cartesianSystem);
			if ((fabs(u) < 0.5*sliceSize) && (fabs(v) < 0.5*sliceSize) && (fabs(w-offsetZ) < 0.5*sliceThickness))
			{
				int iCol = (int)round((u+0.5*sliceSize)/pixelDim);
				int iRow = (int)round((v+0.5*sliceSize)/pixelDim);
				if (iCol<0 || iCol>=nbCellsX || iRow<0 || iRow>=nbCellsY)
					continue;
				int index = iRow * nbCellsX + iCol;
				if (topologyGrid[index] == 0)
				{
					topologyGrid[index] = 1;
					nbCellsFilled ++;
				}
			}
		}
	}
	return nbCellsFilled;
}
void detectCirclesInSlice(uchar *topologyGrid, const int nbCellsX, const int nbCellsY,
	global const float4 *pointsBuffer, const int nPoints, const int nbMaxPointsAllowed,
	const float *cartesianSystem, const float offsetZ, const float sliceSize, const float sliceThickness,
	const float aboutThisRadius, const float radiusMaxAllowed, 
	global float *clResultBuffer, const int startIndex, const float scoreThresholdPix, 
	bool bPassingThruOrigin)
{
	for (int i=0; i<CIRCLE_DETECTION_RESULT_SIZE; i++)
		clResultBuffer[startIndex + i] = -1;

	//Process each point and fill the topology grid
	int nbCellsFilled = fillTopologicalGrid(topologyGrid, nbCellsX, nbCellsY,
		pointsBuffer, nPoints, nbMaxPointsAllowed, cartesianSystem, offsetZ, sliceSize, sliceThickness);

	//Identify all distinct forms in the topological grid
	int nbForms = identifyDistinctFormsInGrid_max254Forms(topologyGrid, nbCellsX, nbCellsY);
	
	int gridCenterX = (int)((nbCellsX-1)/2);
	int gridCenterY = (int)((nbCellsY-1)/2);
	
	int gridSize = nbCellsX*nbCellsY;

	clResultBuffer[startIndex + RESULT_INDEX_HEADER_NBFORMS] = (float)nbForms;
	
	float pixelDim = sliceSize / (float)nbCellsX;
	
	float aboutThisRadiusPix = (float)aboutThisRadius / (float)pixelDim;
	if (aboutThisRadiusPix<=0)
		aboutThisRadiusPix = -1;

	float radiusMaxAllowedPix = (float)radiusMaxAllowed / (float)pixelDim;
	if (radiusMaxAllowedPix<=0)
		radiusMaxAllowedPix = infinity;
	
	float parametresOptimises[3];
	
	//Run the Levenberg Marquardt on each form
	int nbCirclesAcceptedSoFar = 0;
	int totalCirclesDetected = 0;
		
	for (int i=0; i<nbForms; i++)
	{
		for (int k=0; k<3; k++)
			parametresOptimises[k] = 0;
		uchar formId  = (uchar)(TOPOLOGY_FORM_START_ID+i);
		
		float scorePix = doCircleLevenbergMarquardt(topologyGrid, formId, nbCellsX, nbCellsY, radiusMaxAllowedPix, 
					parametresOptimises, CIRCLE_DETECTION_NB_POINTS_MIN, CIRCLE_DETECTION_NB_POINTS_MAX);
		//float scorePix = -1;
		float radiusPix = parametresOptimises[circleParamIndexR];
		
		bool bScoreOk = (scorePix >=0) && (scorePix < scoreThresholdPix);
		bool bRadiusOk = (radiusPix > 0) && (radiusPix < radiusMaxAllowedPix);
		if (aboutThisRadiusPix != -1 && bRadiusOk)
			bRadiusOk = fabs(radiusPix-aboutThisRadiusPix) < 0.2 * aboutThisRadiusPix;

		if (bScoreOk && bRadiusOk)
		{
			totalCirclesDetected ++;

			//Calculate the distance from the origin of the cartesian system
			//Remember that this origin is supposed to lie on the surface of the circle
			float cupix = parametresOptimises[circleParamIndexX];
			float cvpix = parametresOptimises[circleParamIndexY];
			
			if (bPassingThruOrigin)
			{
				float distToCirclePix = sqrt((cupix - gridCenterX) * (cupix - gridCenterX) + (cvpix - gridCenterY) * (cvpix - gridCenterY)) - radiusPix;
				int offsetPixAllowed = 5;
				if (fabs(distToCirclePix) > offsetPixAllowed)
					continue;
			}
					
			float score = pixelDim * pixelDim * scorePix;
			int resultInd = -1;
			if (nbCirclesAcceptedSoFar < CIRCLE_DETECTION_NB_RESULTS_MAX)
			{
				resultInd = nbCirclesAcceptedSoFar;
				nbCirclesAcceptedSoFar++;
			}
			else
			{
				int indWorstScore = -1;
				float worstScore = -1;
				for (int k=0; k<CIRCLE_DETECTION_NB_RESULTS_MAX; k++)
				{
					int ik = startIndex + CIRCLE_DETECTION_RESULT_HEADER_SIZE + CIRCLE_DETECTION_RESULT_UNIT_SIZE*k;
					if (clResultBuffer[ik + RESULT_INDEX_SCORE] > worstScore)
					{
						indWorstScore = k;
						worstScore = clResultBuffer[ik + RESULT_INDEX_SCORE];
					}
				}
				if (indWorstScore != -1 && score < worstScore)
					resultInd = indWorstScore;
			}
			if (resultInd != -1)
			{
				int i0 = startIndex + CIRCLE_DETECTION_RESULT_HEADER_SIZE + CIRCLE_DETECTION_RESULT_UNIT_SIZE*resultInd;

				float localCx = cupix * pixelDim - 0.5*sliceSize;
				float localCy = cvpix * pixelDim - 0.5*sliceSize;
				float3 localC = (float3)(localCx, localCy, offsetZ);
			
				float globalCx = getGlobalX(localC, cartesianSystem);
				float globalCy = getGlobalY(localC, cartesianSystem);
				float globalCz = getGlobalZ(localC, cartesianSystem);
				
				clResultBuffer[i0 + RESULT_INDEX_CU] = parametresOptimises[circleParamIndexX];
				clResultBuffer[i0 + RESULT_INDEX_CV] = parametresOptimises[circleParamIndexY];
				clResultBuffer[i0 + RESULT_INDEX_CN] = offsetZ;
				clResultBuffer[i0 + RESULT_INDEX_CX] = globalCx;
				clResultBuffer[i0 + RESULT_INDEX_CY] = globalCy;
				clResultBuffer[i0 + RESULT_INDEX_CZ] = globalCz;
				clResultBuffer[i0 + RESULT_INDEX_R] = pixelDim * radiusPix;
				clResultBuffer[i0 + RESULT_INDEX_SCORE] = pixelDim * pixelDim * scorePix;
				clResultBuffer[i0 + RESULT_INDEX_NBCELLS] =  nbPointsActifs(topologyGrid, formId, nbCellsX, nbCellsY);
			}
		}
	}
	clResultBuffer[startIndex + RESULT_INDEX_HEADER_NBCIRCLES] = (float)totalCirclesDetected;
	
}