constant int searchHalfSize = 10;
constant int searchSize = 2*10+1;
constant int searchSurface = (2*10+1)*(2*10+1);

constant int dualNormalSearchHalfSize = 7;
constant int dualNormalSearchSize = 2*7+1;
constant int dualNormalSearchSurface = (2*7+1)*(2*7+1);

constant float infinity = 100000000000000.f;

constant int previousCellOffsetSize = 4;
constant int previousCellOffsetX[4] = { -1, -1, -1,  0 };
constant int previousCellOffsetY[4] = { -1,  0,  1, -1 };


float3 sphericalCoordinatesToCartesianCordinatesOnUnitSphere(float thetaX, float thetaY)
{
	float ctx = (float)cos(thetaX);
	float cty = (float)cos(thetaY);
	float stx = (float)sin(thetaX);
	float sty = (float)sin(thetaY);
	float3 cartesianCoords = (float3)(ctx*cty, stx*cty, sty);
	return cartesianCoords;
}

/*float3 getDepthBufferLocalCoordinates(const uchar4 argb, int iH, int iV, int W, int H)
{

	int compR = convert_int(argb.z);
	int compG = convert_int(argb.y);
	int compB = convert_int(argb.x);
	
	
	int R100 = (int)(compR/100);
	float depth = (float)(R100*256*256 + compG*256 + compB)/1000.f;	
	if (depth==0.0f) 
	{
		return (float3)(0.0f,0.0f,0.0f);
	}
	
	//return (float3)(1.0f,0.0f,0.0f);
	
	
	//   - soit (cA, rA) les coordonnées angulaires dans la matrice [W, H] (cA est relatif à theta, rA est relatif à phi)
	//   - iCol = partie entière de cA
	//   - iRow = partie entière de rA
	//   - soit cA_tilde = cA-iCol	=> par construction, 0 < cA_tilde < 1
	//   - soit rA_tilde = rA-iRow	=> par construction, 0 < rA_tilde < 1
	//   - on stocke (on ne prend que les parties entières) :
	//      - 10*cA_tilde
	//      - 10*rA_tilde
	//   - stockage : R10 = 10*cA_tilde
	//   - stockage : R1  = 10*rA_tilde

	int R10 	= (int)((compR-100*R100)/10.f);
	int R1 		= compR-100*R100-10*R10;

	float cA_tilde = (float)R10/10.f;
	float rA_tilde = (float)R1/10.f;

	float iHDouble = iH + cA_tilde;
	if (iHDouble>=W) iHDouble = iHDouble-W;
	float iVDouble = iV + rA_tilde;

	float thetaX = 2.f*M_PI_F*(W-1.f-iHDouble)/W;
	float thetaY = -M_PI_F/2.f + M_PI_F*(H-1.f-iVDouble)/H;
	
	float3 localCoordsReal = sphericalCoordinatesToCartesianCordinatesOnUnitSphere(thetaX, thetaY);
	float xReal = localCoordsReal.x;
	float yReal = localCoordsReal.y;
	float zReal = localCoordsReal.z;

	float3 localCoords = (float3)(-depth*xReal, depth*zReal, depth*yReal);

	return localCoords;
	
}*/

float4 getMeanValue_float4(const float4* values, const int nbVals)
{
	float4 meanVal = (float4)(0);
	
	for (int i=0; i<nbVals; i++)
	{
		meanVal.x+=values[i].x;
		meanVal.y+=values[i].y;
		meanVal.z+=values[i].z;
		meanVal.w+=values[i].w;
	}
	meanVal/=nbVals;
	return meanVal;
}

	
float3 getMeanValue(const float3* values, const int nbVals)
{
	float3 meanVal = (float3)(0.f, 0.f, 0.f);
	
	for (int i=0; i<nbVals; i++)
	{
		meanVal.x+=values[i].x;
		meanVal.y+=values[i].y;
		meanVal.z+=values[i].z;
	}
	meanVal/=nbVals;
	return meanVal;
}

float3 getActiveMeanValue(const float3* values, const int* activeValues, const int nbVals)
{
	float3 meanVal = (float3)(0.f, 0.f, 0.f);
	int nbActiveValues = 0;
	for (int i=0; i<nbVals; i++)
	{
		if (activeValues[i]==1)
		{
			meanVal.x+=values[i].x;
			meanVal.y+=values[i].y;
			meanVal.z+=values[i].z;
			nbActiveValues++;
		}

	}
	if (nbActiveValues!=0) 
		meanVal/=nbActiveValues;
	return meanVal;
}
void getOrthogonalRegressionEnergyFunction(const float3* samplePoints, const int nbVals, float3 samplePointsAverage, float* m) 
{
	//--------------------------------------------------------------
	//--------------------------------------------------------------
	//Matrice Energy:
	//m00 = somme des (xi-a)**2
	//m01 = somme des (xi-a)(yi-b)
	//m02 = somme des (xi-a)(zi-c)
	//m10 = somme des (xi-a)(yi-b)
	//m11 = somme des (yi-b)**2
	//m12 = somme des (yi-b)(zi-c) 
	//m20 = somme des (xi-a)(zi-c)
	//m21 = somme des (yi-b)(zi-c)
	//m22 = somme des (zi-c)**2
	//avec :
	//   - (a, b, c) = samplePointsAverage
	//   - (xi, yi, zi) un point du cloud
	//--------------------------------------------------------------
	//--------------------------------------------------------------
	float a = samplePointsAverage.x;
	float b = samplePointsAverage.y;
	float c = samplePointsAverage.z;
	
	//float m[9] = {0};
	for (int i=0; i<9; i++) m[i] = 0.f;

	float xi, yi, zi;
	
	for (int i=0; i<nbVals; i++)
	{
		xi = samplePoints[i].x; 
		yi = samplePoints[i].y; 
		zi = samplePoints[i].z;
		m[0*3+0] += (xi-a)*(xi-a);		
		m[0*3+1] += (xi-a)*(yi-b);		
		m[0*3+2] += (xi-a)*(zi-c);
		m[1*3+1] += (yi-b)*(yi-b);		
		m[1*3+2] += (yi-b)*(zi-c);		
		m[2*3+2] += (zi-c)*(zi-c);   
	}

	m[1*3+0] = m[0*3+1];	
	m[2*3+0] = m[0*3+2]; 
	m[2*3+1] = m[1*3+2];

	for (int i=0; i<9; i++)
	{
		m[i] /= nbVals;
	}
	
	//return m;	
}




// Symmetric Householder reduction to tridiagonal form.
void tred2 (float* V, float* d, float* e) 
{
	//  This is derived from the Algol procedures tred2 by
   	//  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
   	//  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
   	//  Fortran subroutine in EISPACK.

	for (int j = 0; j < 3; j++) 
	{
		d[j] = V[(3-1)*3+j];
	}

	// Householder reduction to tridiagonal form.
	for (int i = 3-1; i > 0; i--) 
	{
		// Scale to avoid under/overflow.
		float scale = 0.0f;
		float h = 0.0f;
		for (int k = 0; k < i; k++) 
		{
			scale = scale + fabs(d[k]);
		}
		if (scale == 0.0f) 
		{
            e[i] = d[i-1];
            for (int j = 0; j < i; j++) {
               d[j] = V[(i-1)*3+j];
               V[i*3+j] = 0.0f;
               V[j*3+i] = 0.0f;
            }
         } 
         else 
         {
            // Generate Householder vector.
            for (int k = 0; k < i; k++) 
            {
               d[k] /= scale;
               h += d[k] * d[k];
            }
            float f = d[i-1];
            float g = sqrt(h);
            if (f > 0) 
            {
               g = -g;
            }
            e[i] = scale * g;
            h = h - f * g;
            d[i-1] = f - g;
            for (int j = 0; j < i; j++) 
            {
               e[j] = 0.0f;
            }
   
            // Apply similarity transformation to remaining columns.
   
            for (int j = 0; j < i; j++) 
            {
               f = d[j];
               V[j*3+i] = f;
               g = e[j] + V[j*3+j] * f;
               for (int k = j+1; k <= i-1; k++) 
               {
                  g += V[k*3+j] * d[k];
                  e[k] += V[k*3+j] * f;
               }
               e[j] = g;
            }
            f = 0.0f;
            for (int j = 0; j < i; j++) 
            {
               e[j] /= h;
               f += e[j] * d[j];
            }
            float hh = f / (h + h);
            for (int j = 0; j < i; j++) 
            {
               e[j] -= hh * d[j];
            }
            for (int j = 0; j < i; j++) 
            {
               f = d[j];
               g = e[j];
               for (int k = j; k <= i-1; k++) {
                  V[k*3+j] -= (f * e[k] + g * d[k]);
               }
               d[j] = V[(i-1)*3+j];
               V[i*3+j] = 0.0f;
            }
         }
         d[i] = h;
      }
   
      // Accumulate transformations.
   
      for (int i = 0; i < 3-1; i++) 
      {
         V[(3-1)*3+i] = V[i*3+i];
         V[i*3+i] = 1.0f;
         float h = d[i+1];
         if (h != 0.0f) 
         {
            for (int k = 0; k <= i; k++) 
            {
               d[k] = V[k*3+i+1] / h;
            }
            for (int j = 0; j <= i; j++) 
            {
               float g = 0.0f;
               for (int k = 0; k <= i; k++) 
               {
                  g += V[k*3+i+1] * V[k*3+j];
               }
               for (int k = 0; k <= i; k++) 
               {
                  V[k*3+j] -= g * d[k];
               }
            }
         }
         for (int k = 0; k <= i; k++) 
         {
            V[k*3+i+1] = 0.0f;
         }
      }
      for (int j = 0; j < 3; j++) 
      {
         d[j] = V[(3-1)*3+j];
         V[(3-1)*3+j] = 0.0f;
      }
      V[(3-1)*3+3-1] = 1.0f;
      e[0] = 0.0f;
} 

//Computes (a2 + b2)1/2 without destructive underflow or overflow.
float pythag(float a, float b)
{
 	float r;
    if (fabs(a) > fabs(b)) 
    {
		r = b/a;
        r = fabs(a)*sqrt(1.f+r*r);
    } 
    else if (b != 0) 
    {
		r = a/b;
        r = fabs(b)*sqrt(1.f+r*r);
    } 
    else 
    {
		r = 0.0f;
    }
    return r;
} 
 
// Symmetric tridiagonal QL algorithm.
void tql2 (float* V, float* d, float* e) 
{
	int n = 3;
   	//  This is derived from the Algol procedures tql2, by
   	//  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
   	//  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
   	//  Fortran subroutine in EISPACK.
	for (int i = 1; i < n; i++) 
	{
		e[i-1] = e[i];
    }
    e[n-1] = 0.0f;
   
    float f = 0.0f;
    float tst1 = 0.0f;
    float eps = (float)pow(2.0f,-52.0f);
    
    for (int l = 0; l < n; l++) 
    {
         // Find small subdiagonal element
         tst1 = max(tst1,fabs(d[l]) + fabs(e[l]));
         int m = l;
         while (m < n) {
            if (fabs(e[m]) <= eps*tst1) {
               break;
            }
            m++;
         }
   
         // If m == l, d[l] is an eigenvalue,
         // otherwise, iterate.
   
         if (m > l) 
         {
            int iter = 0;
            do 
            {
               iter = iter + 1;  // (Could check iteration count here.)
   
               // Compute implicit shift
               float g = d[l];
               float p = (d[l+1] - g) / (2.0f * e[l]);
               float r = pythag(p,1.0f);
               if (p < 0) {
                  r = -r;
               }
               d[l] = e[l] / (p + r);
               d[l+1] = e[l] * (p + r);
               float dl1 = d[l+1];
               float h = g - d[l];
               for (int i = l+2; i < n; i++) 
               {
                  d[i] -= h;
               }
               f = f + h;
   
               // Implicit QL transformation.
               p = d[m];
               float c = 1.0f;
               float c2 = c;
               float c3 = c;
               float el1 = e[l+1];
               float s = 0.0f;
               float s2 = 0.0f;
               for (int i = m-1; i >= l; i--) 
               {
                  c3 = c2;
                  c2 = c;
                  s2 = s;
                  g = c * e[i];
                  h = c * p;
                  r = pythag(p,e[i]);
                  e[i+1] = s * r;
                  s = e[i] / r;
                  c = p / r;
                  p = c * d[i] - s * g;
                  d[i+1] = h + s * (c * g + s * d[i]);
   
                  // Accumulate transformation.
                  for (int k = 0; k < n; k++) 
                  {
                     h = V[k*3+i+1];
                     V[k*3+i+1] = s * V[k*3+i] + c * h;
                     V[k*3+i] = c * V[k*3+i] - s * h;
                  }
               }
               p = -s * s2 * c3 * el1 * e[l] / dl1;
               e[l] = s * p;
               d[l] = c * p;
   
               // Check for convergence.
            } 
            while (fabs(e[l]) > eps*tst1);
         }
         d[l] = d[l] + f;
         e[l] = 0.0f;
      }
     
      // Sort eigenvalues and corresponding vectors.
      for (int i = 0; i < n-1; i++) 
      {
         int k = i;
         float p = d[i];
         for (int j = i+1; j < n; j++) 
         {
            if (d[j] < p) 
            {
               k = j;
               p = d[j];
            }
         }
         if (k != i) 
         {
            d[k] = d[i];
            d[i] = p;
            for (int j = 0; j < n; j++) 
            {
               p = V[j*+i];
               V[j*3+i] = V[j*3+k];
               V[j*3+k] = p;
            }
         }
      }
 }
  
void EigenvalueDecomposition_Symetric3x3(const float* A, float* V, float* d, float* e) 
{	
    //V = new double[n][n];
    //d = new double[n];
    //e = new double[n];
    for (int i = 0; i < 3; i++) 
    {
    	for (int j = 0; j < 3; j++) 
    	{
    		V[i*3+j] = A[i*3+j];
    	}
    }
    // Tridiagonalize.
    tred2(V, d, e);
   
    // Diagonalize.
    tql2(V, d, e); 
 }

float4 getNormalBestFit(const float3* points, const int nbVals)
{
	float4 bestNormal = (float4)(0.f,0.f,0.f,infinity);

	//--------------------------------------------------------------
	//Get the average of the sample points
	//--------------------------------------------------------------
	float3 barycenter = getMeanValue(points, nbVals);

	//--------------------------------------------------------------
	//Get the energy function
	//--------------------------------------------------------------
	float energyMatrix[9] = {0};
	getOrthogonalRegressionEnergyFunction(points, nbVals, barycenter, energyMatrix);

	//--------------------------------------------------------------
	//Get the smallest eigenvalue
	//--------------------------------------------------------------
	float eigenvectors[9] = {0};
	float eigenvalues[3] = {0};
	float e[3] = {0};
	
	EigenvalueDecomposition_Symetric3x3(energyMatrix, eigenvectors, eigenvalues, e);

	int indSmallestValue = 0;
	int smallestVal = eigenvalues[0];
	if (eigenvalues[1]<smallestVal) 
	{
		smallestVal = eigenvalues[1];
		indSmallestValue = 1;
	}
	if (eigenvalues[2]<smallestVal) 
	{
		smallestVal = eigenvalues[2];
		indSmallestValue = 2;
	}			  

	//--------------------------------------------------------------
	//Get the normal (= eigenvector corresponding to smallest eigenvalue)
	//--------------------------------------------------------------
	bestNormal.x = eigenvectors[0*3+indSmallestValue];
	bestNormal.y = eigenvectors[1*3+indSmallestValue];
	bestNormal.z = eigenvectors[2*3+indSmallestValue];
	
	float norm = sqrt(bestNormal.x*bestNormal.x + bestNormal.y*bestNormal.y +bestNormal.z*bestNormal.z);
	if (norm!=0.0f) 
	{
		bestNormal.x /= norm;
		bestNormal.y /= norm;
		bestNormal.z /= norm;
	}
	
	//--------------------------------------------------------------
	//Get the score
	//--------------------------------------------------------------
	float scalN_mean = 0;
	float scalN;
	int i;
	for (i=0; i<nbVals; i++)
	{
		float3 P = points[i];
		scalN = P.x*bestNormal.x+P.y*bestNormal.y+P.z*bestNormal.z;
		scalN_mean += scalN;
	}
	scalN_mean/=nbVals;
	
	float scoreGlobal = 0;
	float d2;
	for (i=0; i<nbVals; i++)
	{
		float3 P = points[i];
		scalN = P.x*bestNormal.x+P.y*bestNormal.y+P.z*bestNormal.z;
		d2 = (scalN - scalN_mean)*(scalN - scalN_mean);
		scoreGlobal += d2;
	}	
	scoreGlobal /= nbVals;
	bestNormal.w = scoreGlobal;
	
	
	/*
	bestNormal.w = smallestVal;
	*/
	
	return bestNormal;
}
void identifyDistinctForms(const int* topologyMask, int* topologyIds, const int nbCellsX, const int nbCellsY)
{
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			topologyIds[iy*nbCellsX+ix] = 0;
		}
	}
	int foundFormIds[4] = { 0 };
	
	int foundFormCtr;
	int nbFoundForms;
	int currFormId;
	
	int formId;
	
	int formCtr = 0;
	int xdum, ydum;
	int inddum;
	int bFounddum = 0;
	
	int topoMaskVal;
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			if (topologyMask[iy*nbCellsX+ix]==1)
			{
				foundFormIds[0] = 0;
				foundFormIds[1] = 0;
				foundFormIds[2] = 0;
				foundFormIds[3] = 0;
				nbFoundForms = 0;
				foundFormCtr = 0;
				for (int i=0; i<4; i++)
				{
					xdum = ix+previousCellOffsetX[i];
					ydum = iy+previousCellOffsetY[i];
					if ((xdum>=0)&&(xdum<nbCellsX)&&(ydum>=0)&&(ydum<nbCellsY))
					{
						inddum = ydum*nbCellsX + xdum;
						topoMaskVal = topologyMask[inddum];
						if (topoMaskVal==1)
						{
							bFounddum = 0;
							currFormId = topologyIds[inddum];
							for (int k=0; k<foundFormCtr; k++)
							{
								if (foundFormIds[k]==currFormId)
								{
									bFounddum = 1;
									break;
								}
							}
							if (bFounddum==0)
							{
								foundFormIds[foundFormCtr] = currFormId;
								foundFormCtr++;
								nbFoundForms++;
							}
						}	
					}
				}
				if (nbFoundForms>0)
				{
					formId = foundFormIds[0];
					if (nbFoundForms>1)
					{
						for (int k=1; k<nbFoundForms; k++)
						{
							int id = foundFormIds[k];
							for (int ix2=0; ix2<nbCellsX; ix2++)
							{
								for (int iy2=0; iy2<nbCellsY; iy2++)
								{
									if (topologyIds[iy2*nbCellsX+ix2]==id) 
									{
										topologyIds[iy2*nbCellsX+ix2] = formId;
									}
								}
							}
						}
					}
				}
				else
				{
					formCtr++;
					formId = formCtr;
				}
				topologyIds[iy*nbCellsX+ix] = formId;
			}
		}
	}
			
	
	/*
	return 0;
	*/
}

float2 getSphericalCoordinates(	float xReal, float yReal, float zReal,
								float CxReal, float CyReal, float CzReal, 
								float axisXxReal, float axisXyReal, float axisXzReal, 
								float axisYxReal, float axisYyReal, float axisYzReal, 
								float verticalxReal, float verticalyReal, float verticalzReal)
{
	float CPx = xReal-CxReal;
	float CPy = yReal-CyReal;
	float CPz = zReal-CzReal;
	float scal = CPx*verticalxReal+CPy*verticalyReal+CPz*verticalzReal;
	float QPx = scal*verticalxReal;
	float QPy = scal*verticalyReal;
	float QPz = scal*verticalzReal;
	float CQx = CPx-QPx;
	float CQy = CPy-QPy;
	float CQz = CPz-QPz;
	float normCQ = sqrt(CQx*CQx+CQy*CQy+CQz*CQz);
	float normCP = sqrt(CPx*CPx+CPy*CPy+CPz*CPz);
	
	float thetaX = 0;
	if (normCQ==0)
	{
		thetaX = 0;
	}
	else
	{
		thetaX = asin(CQy/normCQ);
		if (CQx>0)
		{
			if (thetaX<0) thetaX = 2.f*M_PI_F + thetaX;
		}
		else if (CQx<0)
		{
			thetaX = M_PI_F - thetaX;
		}
		else
		{
			if (CQy>=0) 	thetaX = 0.5f*M_PI_F;
			else 			thetaX = 3.f*M_PI_F/2.f;
		}
	}
	float thetaY = asin(QPz/normCP);

	float2 sphericalCoords = (float2)(thetaX, thetaY);
	return sphericalCoords;
}


uchar8 motorLocalNormalInfo2uchar(float nxMotorLocal, float nyMotorLocal, float nzMotorLocal, float normalScore)
{
	float scoreLog = round(log10(1.f/normalScore));
	int scoreInt = min((int)scoreLog, 255);	
	

	float2 sphericalCoord = getSphericalCoordinates(-nxMotorLocal, nzMotorLocal, nyMotorLocal, 0, 0, 0, 1.f, 0, 0, 0, 1.f, 0, 0, 0, 1.f);
	float theta_0_2PI = sphericalCoord.x;
	float phi_moinsPIsur2_PIsur2 = sphericalCoord.y;
	float phi_0_PI = phi_moinsPIsur2_PIsur2+0.5f*M_PI_F;

	int thetaIntMilliDeg = (int)(1000*(float)theta_0_2PI*(float)180/(float)M_PI_F);
	int phiIntMilliDeg   = (int)(1000*(float)phi_0_PI*(float)180/(float)M_PI_F);

	int theta256x256 = (int)(thetaIntMilliDeg/(256*256));		
	int theta256 	 = (int)((thetaIntMilliDeg-256*256*theta256x256)/256);
	int theta1 		 = thetaIntMilliDeg-256*theta256-256*256*theta256x256;

	int phi256x256 	 = (int)(phiIntMilliDeg/(256*256));		
	int phi256 	     = (int)((phiIntMilliDeg-256*256*phi256x256)/256);
	int phi1 		 = phiIntMilliDeg-256*phi256-256*256*phi256x256;

	uchar8 MotorLocalNormalInfo = (uchar8)(0);
	
	MotorLocalNormalInfo.s3 = (uchar)(255-scoreInt);
	MotorLocalNormalInfo.s2 = (uchar)(theta256x256);
	MotorLocalNormalInfo.s1 = (uchar)(theta256);
	MotorLocalNormalInfo.s0 = (uchar)(theta1);

	MotorLocalNormalInfo.s7 = (uchar)(255);
	MotorLocalNormalInfo.s6 = (uchar)(phi256x256);
	MotorLocalNormalInfo.s5 = (uchar)(phi256);
	MotorLocalNormalInfo.s4 = (uchar)(phi1);
	
	return MotorLocalNormalInfo;
}

float4 uchar2MotorLocalNormalInfo(uchar8 MotorLocalNormalInfo)
{
	float4 normalInfo = (float4)(0);
	int normalScoreLog = 255-convert_int(MotorLocalNormalInfo.s3);
	if (normalScoreLog==0)
	{
		return normalInfo;
	}
	normalInfo.w = normalScoreLog;
	
	
	int theta256x256 = convert_int(MotorLocalNormalInfo.s2);		
	int theta256 	 = convert_int(MotorLocalNormalInfo.s1);
	int theta1 		 = convert_int(MotorLocalNormalInfo.s0);	
	
		
	int phi256x256   = convert_int(MotorLocalNormalInfo.s6);	
	int phi256 	     = convert_int(MotorLocalNormalInfo.s5);
	int phi1 		 = convert_int(MotorLocalNormalInfo.s4);
	
	
	int thetaIntMilliDeg = theta256x256*256*256 + theta256*256 + theta1;
	int phiIntMilliDeg   = phi256x256*256*256 + phi256*256 + phi1;

	float thetaX = thetaIntMilliDeg*M_PI_F/(float)(180*1000);
	float thetaY = -0.5f*M_PI_F + phiIntMilliDeg*M_PI_F/(float)(180*1000);
	
	float3 Punit_realWorld = sphericalCoordinatesToCartesianCordinatesOnUnitSphere(thetaX, thetaY);
	normalInfo.x = -Punit_realWorld.x;
	normalInfo.y =  Punit_realWorld.z;
	normalInfo.z =  Punit_realWorld.y;
	
	return normalInfo;

}

float4 getNormalInfo(global const uchar4 *normals, int gx, int gy, int W, int H)
{
	uchar4 normalComponent_ThetaAndScore = normals[W*gy+gx];
	uchar4 normalComponent_Phi = normals[W*(gy+H)+gx];
	uchar8 MotorLocalNormalInfo = (uchar8)(0);
	
	MotorLocalNormalInfo.s0 = normalComponent_ThetaAndScore.x;
	MotorLocalNormalInfo.s1 = normalComponent_ThetaAndScore.y;
	MotorLocalNormalInfo.s2 = normalComponent_ThetaAndScore.z;
	MotorLocalNormalInfo.s3 = normalComponent_ThetaAndScore.w;
	MotorLocalNormalInfo.s4 = normalComponent_Phi.x;
	MotorLocalNormalInfo.s5 = normalComponent_Phi.y;
	MotorLocalNormalInfo.s6 = normalComponent_Phi.z;

	float4 normalInfo = uchar2MotorLocalNormalInfo(MotorLocalNormalInfo);
	
	return normalInfo;
}

int2 srcColRow2destColRow_Panoramic(int srcCol, int srcRow, int srcImageMargin, int srcWidthIncludingMargin, int destImageMargin, int destWidthIncludingMargin)
{
	int srcW_noMargin = srcWidthIncludingMargin-2*srcImageMargin;
	int srcH_noMargin = (int)(srcW_noMargin/2);
	
	int srcRow_noMargin = srcRow;
	int srcCol_noMargin = srcCol-srcImageMargin;
	
	int destWidth_noMargin = destWidthIncludingMargin-2*destImageMargin;
	int destHeight_noMargin = (int)(destWidth_noMargin/2);
	
	int destRow_noMargin = (int)round((float)srcRow_noMargin*(float)destHeight_noMargin/(float)srcH_noMargin);
	if (destRow_noMargin==destHeight_noMargin) destRow_noMargin = destHeight_noMargin-1;
	int destCol_noMargin = (int)round((float)srcCol_noMargin*(float)destWidth_noMargin/(float)srcW_noMargin);
	if (destCol_noMargin==destWidth_noMargin) destCol_noMargin = 0;
	
	int destRowIncludingMargin = destRow_noMargin;
	int destColIncludingMargin = destCol_noMargin+destImageMargin;
	
	return (int2)(destColIncludingMargin, destRowIncludingMargin);
}
int2 getNbNeighborsAndOutliers_normals(global const uchar4 *normals, int gx, int gy, int W, int searchHalfSize, float4 normalInfo, float normalDotThreshold)
{
	int H = (int)(W/2);

	int ir, ic, iCol, iRow;
	float4 normalInfoNeighbor;
	
	int nbNormals = 0;
	int nbNormalOutliers = 0;
	for (ic=-searchHalfSize; ic<=searchHalfSize; ic++)
	{
		iCol = gx+ic;
		if (iCol<0) iCol+=W;
		if (iCol>=W) iCol-=W;
		for (ir=-searchHalfSize; ir<=searchHalfSize; ir++)
		{
			iRow = gy+ir;
			if ((iRow>=0)&&(iRow<H))
			{
				normalInfoNeighbor = getNormalInfo(normals, iCol, iRow, W, H);
				if (normalInfoNeighbor.w!=255)
				{
					nbNormals++;
					float fdot = fabs(dot(normalInfoNeighbor.xyz, normalInfo.xyz));
					if (fdot<normalDotThreshold)
						nbNormalOutliers++;
				}
			}
		}	
	}	
	
	return (int2)(nbNormals, nbNormalOutliers);
}
kernel void identifyFlatAreas(global const uchar4 *src, global const uchar4 *normals, __global int *results, int W, int flatAreaSearchHalfSize, float normalDotThreshold, float ratioNbNormalOutliers)
{
	int gx = get_global_id(0);
	int gy = get_global_id(1);
	uchar4 argb = src[W*gy+gx];
	
	int H = (int)(W/2);

	float3 localCoords = getDepthBufferLocalCoordinates(argb, gx, gy, W, H);
	float x = localCoords.x;
	float y = localCoords.y;
	float z = localCoords.z;

	int ic, ir, iCol, iRow;
	float4 normalInfo = getNormalInfo(normals, gx, gy, W, H);

	if ( (((x!=0.f)||(y!=0.f)||(z!=0.f))) && (normalInfo.w!=255) )
	{
	
		int2 nbNeighborsAndOutliers = getNbNeighborsAndOutliers_normals(normals, gx, gy, W, flatAreaSearchHalfSize, normalInfo, normalDotThreshold);
		int nbNormals = nbNeighborsAndOutliers.x;
		int nbNormalOutliers = nbNeighborsAndOutliers.y;

		/*
		float4 normalInfoNeighbor;
		int nbNormals = 0;
		int nbNormalOutliers = 0;
		for (ic=-flatAreaSearchHalfSize; ic<=flatAreaSearchHalfSize; ic++)
		{
			iCol = gx+ic;
			if (iCol<0) iCol+=W;
			if (iCol>=W) iCol-=W;
			for (ir=-flatAreaSearchHalfSize; ir<=flatAreaSearchHalfSize; ir++)
			{
				iRow = gy+ir;
				if ((iRow>=0)&&(iRow<H))
				{
					normalInfoNeighbor = getNormalInfo(normals, iCol, iRow, W, H);
					if (normalInfoNeighbor.w!=255)
					{
						nbNormals++;
						float fdot = fabs(dot(normalInfoNeighbor.xyz, normalInfo.xyz));
						if (fdot<normalDotThreshold)
							nbNormalOutliers++;
					}
				}
			}	
		}
		*/
		
		if (nbNormalOutliers<ratioNbNormalOutliers*nbNormals)
		{
			results[W*gy+gx] = 1;
			
			/*
			uchar8 normalInfoUchar = motorLocalNormalInfo2uchar(normalInfo.x, normalInfo.y, normalInfo.z, normalInfo.w);
		  	results[W*gy+gx].x = normalInfoUchar.s0; 		//BLEU
		  	results[W*gy+gx].y = normalInfoUchar.s1; 		//VERT
		  	results[W*gy+gx].z = normalInfoUchar.s2; 		//ROUGE
		  	results[W*gy+gx].w = 255; 						//ALPHA	
		  		
		  	results[W*(gy+H)+gx].x = normalInfoUchar.s4; 	//BLEU
		  	results[W*(gy+H)+gx].y = normalInfoUchar.s5; 	//VERT
		  	results[W*(gy+H)+gx].z = normalInfoUchar.s6; 	//ROUGE
		  	results[W*(gy+H)+gx].w = normalInfoUchar.s7; 	//ALPHA
		  	*/			
		}
	}
}


kernel void computeDualNormal(global const uchar4 *src, global const uchar4 *normals, global const int *flatFlags, __global uchar4 *results, int W, int scoreThreshold, float fillRatioThreshold)
{
	float theta_1pixel = (float)(2.f*M_PI_F/(float)W);
	float pixelDimension_1meter = 2.f*tan(0.5f*theta_1pixel);


	int gx = get_global_id(0);
	int gy = get_global_id(1);
	uchar4 argb = src[W*gy+gx];
	int isFlat = flatFlags[W*gy+gx];
	
	
	int H = (int)(W/2);
	
	
	float3 localCoords = getDepthBufferLocalCoordinates(argb, gx, gy, W, H);
	float x = localCoords.x;
	float y = localCoords.y;
	float z = localCoords.z;

	float4 normalInfo = getNormalInfo(normals, gx, gy, W, H);

	if ( (((x!=0.f)||(y!=0.f)||(z!=0.f))) && (normalInfo.w!=255) && (isFlat==0) )
	{
		float4 normalNeighborsGrid[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int normalNeighborsGrid_flags[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int normalNeighborsGrid_topologyIds[dualNormalSearchSize*dualNormalSearchSize] = {0};
		
		float4 normalInfoNeighbor;
	
		uchar4 argbNeigh;
		float3 neighbor;
		float3 neighborsGrid[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int neighborsGrid_flags[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int neighborsGrid_topologyIds[dualNormalSearchSize*dualNormalSearchSize] = {0};
		float3 pivot;
		
		
		
		
		int ic, ir;
		int iCol;
		int iRow;
		int coldum, rowdum;
		float dist;
	
	
		if (normalInfo.w!=255)
		{
			for (ic=-dualNormalSearchHalfSize; ic<=dualNormalSearchHalfSize; ic++)
			{
				iCol = gx+ic;
				if (iCol<0) iCol+=W;
				if (iCol>=W) iCol-=W;
				for (ir=-dualNormalSearchHalfSize; ir<=dualNormalSearchHalfSize; ir++)
				{
					iRow = gy+ir;
					if ((iRow>=0)&&(iRow<H))
					{
						normalInfoNeighbor = getNormalInfo(normals, iCol, iRow, W, H);
						if (normalInfoNeighbor.w!=255)
						{
							normalNeighborsGrid[(dualNormalSearchHalfSize+ir)*dualNormalSearchSize + dualNormalSearchHalfSize+ic] = normalInfoNeighbor;
						}
	
						argbNeigh = src[W*iRow+iCol];
						neighbor = getDepthBufferLocalCoordinates(argbNeigh, iCol, iRow, W, H);
						if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
						{
							neighborsGrid[(dualNormalSearchHalfSize+ir)*dualNormalSearchSize + dualNormalSearchHalfSize+ic] = neighbor;
						}
					}
				}
			}
		}
		

		neighborsGrid_flags[(dualNormalSearchHalfSize)*dualNormalSearchSize+dualNormalSearchHalfSize] = 1;
		int bFoundSpatialNeighbor;
		float pivotDistToCenter;
		for (iRow=0; iRow<dualNormalSearchSize; iRow++)
		{
			for (iCol=0; iCol<dualNormalSearchSize; iCol++)
			{
				pivot = neighborsGrid[iRow*dualNormalSearchSize+iCol];
				
				pivotDistToCenter = sqrt(pivot.x*pivot.x+pivot.y*pivot.y+pivot.z*pivot.z);

				bFoundSpatialNeighbor = 0;
				if ((pivot.x!=0.f)||(pivot.y!=0.f)||(pivot.z!=0.f))
				{
					for (ir=-1; ir<=1; ir++)
					{
						rowdum = iRow+ir;
						if ((rowdum>=0)&&(rowdum<dualNormalSearchSize))
						{
							for (ic=-1; ic<=1; ic++)
							{
								if ((ir!=0)||(ic!=0))
								{
									coldum = iCol+ic;
									if ((coldum>=0)&&(coldum<dualNormalSearchSize))
									{
										neighbor = neighborsGrid[rowdum*dualNormalSearchSize+coldum];
										if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
										{
											dist = sqrt((pivot.x-neighbor.x)*(pivot.x-neighbor.x)+(pivot.y-neighbor.y)*(pivot.y-neighbor.y)+(pivot.z-neighbor.z)*(pivot.z-neighbor.z));
											if (dist<2.f*pixelDimension_1meter*pivotDistToCenter)
											{
												neighborsGrid_flags[iRow*dualNormalSearchSize+iCol] = 1;
												bFoundSpatialNeighbor = 1;
												break;
											}
										}
									}
								}
							}
						}
						if (bFoundSpatialNeighbor==1)
						{
							break;
						}
					}
				}
			}
		}
		identifyDistinctForms(neighborsGrid_flags, neighborsGrid_topologyIds, dualNormalSearchSize, dualNormalSearchSize);
		int centricFormId = neighborsGrid_topologyIds[(dualNormalSearchHalfSize)*dualNormalSearchSize+dualNormalSearchHalfSize];
		

		float3 neighbors[dualNormalSearchSurface] = {0};
		float3 normalVirtualPoints[2*dualNormalSearchSurface] = {0};
		int nbNeighbors = 0;
		int nbVirtualPoints = 0;
		for (int i=0; i<dualNormalSearchSize*dualNormalSearchSize; i++)
		{
			if (neighborsGrid_topologyIds[i]==centricFormId)
			{
			 	neighbor = neighborsGrid[i];
			 	normalInfoNeighbor = normalNeighborsGrid[i];
			 	if ( ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f)) && (normalInfoNeighbor.w!=255) )
			 	{
			 		neighbors[nbNeighbors].xyz = neighbor.xyz;
			 		nbNeighbors++;
			 		normalVirtualPoints[nbVirtualPoints] = (float3)(normalInfoNeighbor.x, normalInfoNeighbor.y, normalInfoNeighbor.z);
			 		nbVirtualPoints++;
			 		normalVirtualPoints[nbVirtualPoints] = (float3)(-normalInfoNeighbor.x, -normalInfoNeighbor.y, -normalInfoNeighbor.z);
			 		nbVirtualPoints++;						
			 	}
			}
		}
		
		//int nbNeighborsMin = (int)(0.8f*dualNormalSearchSurface);
		int nbNeighborsMin = (int)(fillRatioThreshold*dualNormalSearchSurface);
		
		if (nbNeighbors>nbNeighborsMin)
		{
			float4 dualNormal = getNormalBestFit(normalVirtualPoints, nbVirtualPoints);
			float scoreLog = round(log10(1.f/dualNormal.w));
			int scoreInt = min((int)scoreLog, 255);	
			if (scoreInt>=scoreThreshold)
			{
				float3 localDualN = dualNormal.xyz;
				if (dot(localDualN, localCoords)<0.f)
				{
					localDualN.x = -localDualN.x;
					localDualN.y = -localDualN.y;
					localDualN.z = -localDualN.z;
				}
				uchar8 dualNormalInfo = motorLocalNormalInfo2uchar(localDualN.x, localDualN.y, localDualN.z, dualNormal.w);
	  			results[W*gy+gx].x = dualNormalInfo.s0; 		//BLEU
	  			results[W*gy+gx].y = dualNormalInfo.s1; 		//VERT
	  			results[W*gy+gx].z = dualNormalInfo.s2; 		//ROUGE
	  			results[W*gy+gx].w = dualNormalInfo.s3; 		//ALPHA	
	  		
	  			results[W*(gy+H)+gx].x = dualNormalInfo.s4; 	//BLEU
	  			results[W*(gy+H)+gx].y = dualNormalInfo.s5; 	//VERT
	  			results[W*(gy+H)+gx].z = dualNormalInfo.s6; 	//ROUGE
	  			results[W*(gy+H)+gx].w = dualNormalInfo.s7; 	//ALPHA	
			}
		}
	}
}


kernel void checkQualityDualNormal(global const uchar4 *dualNormals, __global uchar4 *dualNormalsOptimized, int W, int dualNormalQualityCheckHalfSize, float dualNormalDotThreshold, float ratioNbDualNormalOutliers)
{
	int gx = get_global_id(0);
	int gy = get_global_id(1);

	int H = (int)(W/2);

	float4 dualNormalInfo = getNormalInfo(dualNormals, gx, gy, W, H);

	int ir, ic, iCol, iRow;
		
	if ( (dualNormalInfo.w!=255) )
	{
		int2 nbNeighborsAndOutliers = getNbNeighborsAndOutliers_normals(dualNormals, gx, gy, W, dualNormalQualityCheckHalfSize, dualNormalInfo, dualNormalDotThreshold);
		int nbDualNormals = nbNeighborsAndOutliers.x;
		int nbDualNormalOutliers = nbNeighborsAndOutliers.y;
	
		if (nbDualNormalOutliers<ratioNbDualNormalOutliers*nbDualNormals)
		{
			dualNormalsOptimized[W*gy+gx].x = dualNormals[W*gy+gx].x; 		//BLEU
	  		dualNormalsOptimized[W*gy+gx].y = dualNormals[W*gy+gx].y; 		//VERT
	  		dualNormalsOptimized[W*gy+gx].z = dualNormals[W*gy+gx].z; 		//ROUGE
	  		dualNormalsOptimized[W*gy+gx].w = dualNormals[W*gy+gx].w; 		//ALPHA	
			dualNormalsOptimized[W*(gy+H)+gx].x = dualNormals[W*(gy+H)+gx].x; 		//BLEU
	  		dualNormalsOptimized[W*(gy+H)+gx].y = dualNormals[W*(gy+H)+gx].y; 		//VERT
	  		dualNormalsOptimized[W*(gy+H)+gx].z = dualNormals[W*(gy+H)+gx].z; 		//ROUGE
	  		dualNormalsOptimized[W*(gy+H)+gx].w = dualNormals[W*(gy+H)+gx].w; 		//ALPHA	
		}
		else
		{
			dualNormalsOptimized[W*gy+gx].x = 0; 		//BLEU
	  		dualNormalsOptimized[W*gy+gx].y = 0; 		//VERT
	  		dualNormalsOptimized[W*gy+gx].z = 0; 		//ROUGE
	  		dualNormalsOptimized[W*gy+gx].w = 0; 		//ALPHA	
			dualNormalsOptimized[W*(gy+H)+gx].x = 0; 		//BLEU
	  		dualNormalsOptimized[W*(gy+H)+gx].y = 0; 		//VERT
	  		dualNormalsOptimized[W*(gy+H)+gx].z = 0; 		//ROUGE
	  		dualNormalsOptimized[W*(gy+H)+gx].w = 0; 		//ALPHA	
		}
	}
}





















kernel void computeDualNormal_OLD(global const uchar4 *src, global const uchar4 *normals, __global uchar4 *results, int W)
{
	float theta_1pixel = (float)(2.f*M_PI_F/(float)W);
	float pixelDimension_1meter = 2.f*tan(0.5f*theta_1pixel);


	int gx = get_global_id(0);
	int gy = get_global_id(1);
	uchar4 argb = src[W*gy+gx];
	
	int H = (int)(W/2);
	
	
	float3 localCoords = getDepthBufferLocalCoordinates(argb, gx, gy, W, H);
	float x = localCoords.x;
	float y = localCoords.y;
	float z = localCoords.z;

	float4 normalInfo = getNormalInfo(normals, gx, gy, W, H);

	if ( (((x!=0.f)||(y!=0.f)||(z!=0.f))) && (normalInfo.w!=255) )
	{
		float4 normalNeighborsGrid[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int normalNeighborsGrid_flags[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int normalNeighborsGrid_topologyIds[dualNormalSearchSize*dualNormalSearchSize] = {0};
		
		float4 normalInfoNeighbor;
		
		
		uchar4 argbNeigh;
		float3 neighbor;
		float3 neighborsGrid[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int neighborsGrid_flags[dualNormalSearchSize*dualNormalSearchSize] = {0};
		int neighborsGrid_topologyIds[dualNormalSearchSize*dualNormalSearchSize] = {0};
		float3 pivot;
		
		
		
		
		int ic, ir;
		int iCol;
		int iRow;
		int coldum, rowdum;
		float dist;
	
	
		if (normalInfo.w!=255)
		{
			for (ic=-dualNormalSearchHalfSize; ic<=dualNormalSearchHalfSize; ic++)
			{
				iCol = gx+ic;
				if (iCol<0) iCol+=W;
				if (iCol>=W) iCol-=W;
				for (ir=-dualNormalSearchHalfSize; ir<=dualNormalSearchHalfSize; ir++)
				{
					iRow = gy+ir;
					if ((iRow>=0)&&(iRow<H))
					{
						normalInfoNeighbor = getNormalInfo(normals, iCol, iRow, W, H);
						if (normalInfoNeighbor.w!=255)
						{
							normalNeighborsGrid[(dualNormalSearchHalfSize+ir)*dualNormalSearchSize + dualNormalSearchHalfSize+ic] = normalInfoNeighbor;
						}
	
						argbNeigh = src[W*iRow+iCol];
						neighbor = getDepthBufferLocalCoordinates(argbNeigh, iCol, iRow, W, H);
						if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
						{
							neighborsGrid[(dualNormalSearchHalfSize+ir)*dualNormalSearchSize + dualNormalSearchHalfSize+ic] = neighbor;
						}
					}
				}
			}
		}
		

		neighborsGrid_flags[(dualNormalSearchHalfSize)*dualNormalSearchSize+dualNormalSearchHalfSize] = 1;
		int bFoundSpatialNeighbor;
		float pivotDistToCenter;
		for (iRow=0; iRow<dualNormalSearchSize; iRow++)
		{
			for (iCol=0; iCol<dualNormalSearchSize; iCol++)
			{
				pivot = neighborsGrid[iRow*dualNormalSearchSize+iCol];
				
				pivotDistToCenter = sqrt(pivot.x*pivot.x+pivot.y*pivot.y+pivot.z*pivot.z);

				bFoundSpatialNeighbor = 0;
				if ((pivot.x!=0.f)||(pivot.y!=0.f)||(pivot.z!=0.f))
				{
					for (ir=-1; ir<=1; ir++)
					{
						rowdum = iRow+ir;
						if ((rowdum>=0)&&(rowdum<dualNormalSearchSize))
						{
							for (ic=-1; ic<=1; ic++)
							{
								if ((ir!=0)||(ic!=0))
								{
									coldum = iCol+ic;
									if ((coldum>=0)&&(coldum<dualNormalSearchSize))
									{
										neighbor = neighborsGrid[rowdum*dualNormalSearchSize+coldum];
										if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
										{
											dist = sqrt((pivot.x-neighbor.x)*(pivot.x-neighbor.x)+(pivot.y-neighbor.y)*(pivot.y-neighbor.y)+(pivot.z-neighbor.z)*(pivot.z-neighbor.z));
											if (dist<2.f*pixelDimension_1meter*pivotDistToCenter)
											{
												neighborsGrid_flags[iRow*dualNormalSearchSize+iCol] = 1;
												bFoundSpatialNeighbor = 1;
												break;
											}
										}
									}
								}
							}
						}
						if (bFoundSpatialNeighbor==1)
						{
							break;
						}
					}
				}
			}
		}
		identifyDistinctForms(neighborsGrid_flags, neighborsGrid_topologyIds, dualNormalSearchSize, dualNormalSearchSize);
		int centricFormId = neighborsGrid_topologyIds[(dualNormalSearchHalfSize)*dualNormalSearchSize+dualNormalSearchHalfSize];
		
		int nbNeighborsMin = (int)(0.8f*dualNormalSearchSurface);
		
		/* Une bonne qualite de normale est necessaire pour le test de planeite */
		int scoreThresholdForFlatTest = 4;
		
		int nbNeighborNormalsThresholdForFlatTest = (int)(0.8f*nbNeighborsMin);
	
		float3 neighbors[dualNormalSearchSurface] = {0};
		float4 neighborNormals[dualNormalSearchSurface] = {0};
		float3 normalVirtualPoints[2*dualNormalSearchSurface] = {0};
		int nbNeighborNormals = 0;
		int nbNeighbors = 0;
		int nbVirtualPoints = 0;
		for (int i=0; i<dualNormalSearchSize*dualNormalSearchSize; i++)
		{
			if (neighborsGrid_topologyIds[i]==centricFormId)
			{
			 	neighbor = neighborsGrid[i];
			 	normalInfoNeighbor = normalNeighborsGrid[i];
			 	
			 	if ( ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f)) && (normalInfoNeighbor.w!=255) )
			 	//if ( ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f)) )
			 	{
					if (normalInfoNeighbor.w>=scoreThresholdForFlatTest)
					{
						neighborNormals[nbNeighborNormals] = (float4)(normalInfoNeighbor.x, normalInfoNeighbor.y, normalInfoNeighbor.z, normalInfoNeighbor.w);
						nbNeighborNormals++;
					}
			 		neighbors[nbNeighbors].xyz = neighbor.xyz;
			 		nbNeighbors++;
			 		normalVirtualPoints[nbVirtualPoints] = (float3)(normalInfoNeighbor.x, normalInfoNeighbor.y, normalInfoNeighbor.z);
			 		nbVirtualPoints++;
			 		normalVirtualPoints[nbVirtualPoints] = (float3)(-normalInfoNeighbor.x, -normalInfoNeighbor.y, -normalInfoNeighbor.z);
			 		nbVirtualPoints++;						
			 	}
			}
		}
		
		if (nbNeighbors>nbNeighborsMin)
		{
			int isFlat = 0;
			if (nbNeighborNormals>nbNeighborNormalsThresholdForFlatTest)
			{
				isFlat = 1;
				float4 meanNormal = getMeanValue_float4(neighborNormals, nbNeighborNormals);
				float norm = sqrt(meanNormal.x*meanNormal.x+meanNormal.y*meanNormal.y+meanNormal.z*meanNormal.z);
				meanNormal /= norm;
				//float flatScalThershold = 0.9999f;
				float flatScalThershold = 0.99f;
				for (int i=0; i<nbNeighborNormals; i++)
				{
					float scal = neighborNormals[i].x*meanNormal.x+neighborNormals[i].y*meanNormal.y+neighborNormals[i].z*meanNormal.z;
					if (fabs(scal)<flatScalThershold)
					{
						isFlat = 0;
						break;
					}
				}
			}
	
			if (isFlat==0)
			{
				float4 dualNormal = getNormalBestFit(normalVirtualPoints, nbVirtualPoints);
				float scoreLog = round(log10(1.f/dualNormal.w));
				int scoreInt = min((int)scoreLog, 255);	
				int scoreThreshold = 4;
				if (scoreInt>=scoreThreshold)
				{
					
					float3 localDualN = dualNormal.xyz;
					if (dot(localDualN, localCoords)<0.f)
					{
						localDualN.x = -localDualN.x;
						localDualN.y = -localDualN.y;
						localDualN.z = -localDualN.z;
					}
					uchar8 dualNormalInfo = motorLocalNormalInfo2uchar(localDualN.x, localDualN.y, localDualN.z, dualNormal.w);
		  			results[W*gy+gx].x = dualNormalInfo.s0; 		//BLEU
		  			results[W*gy+gx].y = dualNormalInfo.s1; 		//VERT
		  			results[W*gy+gx].z = dualNormalInfo.s2; 		//ROUGE
		  			results[W*gy+gx].w = dualNormalInfo.s3; 		//ALPHA	
		  		
		  			results[W*(gy+H)+gx].x = dualNormalInfo.s4; 	//BLEU
		  			results[W*(gy+H)+gx].y = dualNormalInfo.s5; 	//VERT
		  			results[W*(gy+H)+gx].z = dualNormalInfo.s6; 	//ROUGE
		  			results[W*(gy+H)+gx].w = dualNormalInfo.s7; 	//ALPHA	
				}
			}
		
		
		

		}
	}
}








kernel void computeNormal(global const uchar4 *src, __global uchar4 *results, int W)
{
	float theta_1pixel = (float)(2.f*M_PI_F/(float)W);
	float pixelDimension_1meter = 2.f*tan(0.5f*theta_1pixel);

	int gx = get_global_id(0);
	int gy = get_global_id(1);
	uchar4 argb = src[W*gy+gx];
	

	int H = W/2;
	float3 localCoords = getDepthBufferLocalCoordinates(argb, gx, gy, W, H);
	float x = localCoords.x;
	float y = localCoords.y;
	float z = localCoords.z;
	int ic, ir;
	int iCol;
	int iRow;
	uchar4 argbNeigh;
	float3 neighbor;
	
	float3 neighborsGrid[searchSize*searchSize] = {0};
	int neighborsGrid_flags[searchSize*searchSize] = {0};
	int neighborsGrid_topologyIds[searchSize*searchSize] = {0};
	float3 pivot;
	
	int coldum, rowdum;
	float dist;

	if ((x!=0.f)||(y!=0.f)||(z!=0.f))
	{
		for (ic=-searchHalfSize; ic<=searchHalfSize; ic++)
		{
			iCol = gx+ic;
			if (iCol<0) iCol+=W;
			if (iCol>=W) iCol-=W;
			for (ir=-searchHalfSize; ir<=searchHalfSize; ir++)
			{
				iRow = gy+ir;
				if ((iRow>=0)&&(iRow<H))
				{
					argbNeigh = src[W*iRow+iCol];
					neighbor = getDepthBufferLocalCoordinates(argbNeigh, iCol, iRow, W, H);
					if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
					{
						neighborsGrid[(searchHalfSize+ir)*searchSize + searchHalfSize+ic] = neighbor;
					}
				}
			}
		}
		
		
		//---------------------------------------------------------
		//---------------------------------------------------------
		//Analyse de l'éligibilité des points
		//---------------------------------------------------------
		//---------------------------------------------------------
		neighborsGrid_flags[(searchHalfSize)*searchSize+searchHalfSize] = 1;
		int bFoundSpatialNeighbor;
		float pivotDistToCenter;
		
		for (iRow=0; iRow<searchSize; iRow++)
		{
			for (iCol=0; iCol<searchSize; iCol++)
			{
				pivot = neighborsGrid[iRow*searchSize+iCol];
				
				pivotDistToCenter = sqrt(pivot.x*pivot.x+pivot.y*pivot.y+pivot.z*pivot.z);

				bFoundSpatialNeighbor = 0;
				if ((pivot.x!=0.f)||(pivot.y!=0.f)||(pivot.z!=0.f))
				{
					for (ir=-1; ir<=1; ir++)
					{
						rowdum = iRow+ir;
						if ((rowdum>=0)&&(rowdum<searchSize))
						{
							for (ic=-1; ic<=1; ic++)
							{
								if ((ir!=0)||(ic!=0))
								{
									coldum = iCol+ic;
									if ((coldum>=0)&&(coldum<searchSize))
									{
										neighbor = neighborsGrid[rowdum*searchSize+coldum];
										if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
										{
											dist = sqrt((pivot.x-neighbor.x)*(pivot.x-neighbor.x)+(pivot.y-neighbor.y)*(pivot.y-neighbor.y)+(pivot.z-neighbor.z)*(pivot.z-neighbor.z));
											if (dist<2.f*pixelDimension_1meter*pivotDistToCenter)
											{
												neighborsGrid_flags[iRow*searchSize+iCol] = 1;
												bFoundSpatialNeighbor = 1;
												break;
											}
										}
									}
								}
							}
						}
						if (bFoundSpatialNeighbor==1)
						{
							break;
						}
					}
				}
			}
		}
		
		
		//---------------------------------------------------------
		//---------------------------------------------------------
		//Analyse topologique des points éligibles
		//---------------------------------------------------------
		//---------------------------------------------------------
		identifyDistinctForms(neighborsGrid_flags, neighborsGrid_topologyIds, searchSize, searchSize);
		int centricFormId = neighborsGrid_topologyIds[(searchHalfSize)*searchSize+searchHalfSize];
		
		int nbNeighborsMin = (int)(0.8f*searchSurface);
	
		float3 neighbors[searchSurface] = {0};
		int nbNeighbors = 0;
		for (int i=0; i<searchSize*searchSize; i++)
		{
			if (neighborsGrid_topologyIds[i]==centricFormId)
			{
			 	neighbor = neighborsGrid[i];
			 	if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
			 	{
			 		neighbors[nbNeighbors].xyz = neighbor.xyz;
			 		nbNeighbors++;
			 	}
			}
		}

		if (nbNeighbors>nbNeighborsMin)
		{
			float4 bestNormal = getNormalBestFit(neighbors, nbNeighbors);
			float3 localN = bestNormal.xyz;
			
			if (dot(localN, localCoords)>0.f)
			{
				localN.x = -localN.x;
				localN.y = -localN.y;
				localN.z = -localN.z;
			}

			uchar8 normalInfo = motorLocalNormalInfo2uchar(localN.x, localN.y, localN.z, bestNormal.w);
	  		results[W*gy+gx].x = normalInfo.s0; 		//BLEU
	  		results[W*gy+gx].y = normalInfo.s1; 		//VERT
	  		results[W*gy+gx].z = normalInfo.s2; 		//ROUGE
	  		results[W*gy+gx].w = normalInfo.s3; 		//ALPHA	
	  		
	  		results[W*(gy+H)+gx].x = normalInfo.s4; 	//BLEU
	  		results[W*(gy+H)+gx].y = normalInfo.s5; 	//VERT
	  		results[W*(gy+H)+gx].z = normalInfo.s6; 	//ROUGE
	  		results[W*(gy+H)+gx].w = normalInfo.s7; 	//ALPHA	
			/*
		  	resultsTheta[W*gy+gx].x = (uchar)(255*(1+localN.x)/2); 		//BLEU
	  		resultsTheta[W*gy+gx].y = (uchar)(255*(1+localN.y)/2); 		//VERT
	  		resultsTheta[W*gy+gx].z = (uchar)(255*(1+localN.z)/2); 		//ROUGE
	  		resultsTheta[W*gy+gx].w = 255; 		//ALPHA	
	  		*/
		}

		/*
	  	resultsTheta[W*gy+gx].x = 0; 		//BLEU
	  	resultsTheta[W*gy+gx].y = 0; 		//VERT
	  	resultsTheta[W*gy+gx].z = 255; 		//ROUGE
	  	resultsTheta[W*gy+gx].w = 255; 		//ALPHA	
	  	*/
	}
}

kernel void computeNormalAdaptive(global const uchar4 *srcDepthBuffer, __global uchar4 *destNormalBuffer, int srcDepthBufferWidth, int destNormalBufferWidth)
{
	float theta_1pixel = (float)(2.f*M_PI_F/(float)srcDepthBufferWidth);
	float pixelDimension_1meter = 2.f*tan(0.5f*theta_1pixel);
	int srcDepthBufferHeight = srcDepthBufferWidth/2;
	int srcDepthBufferSize = srcDepthBufferWidth*srcDepthBufferHeight;
	int destNormalBufferHeight = destNormalBufferWidth/2;
	int destNormalBufferSize = destNormalBufferWidth*destNormalBufferWidth;

	int gx = get_global_id(0);
	int gy = get_global_id(1);
	
	if ((gy*destNormalBufferWidth+gx>=destNormalBufferSize)||(gy>=destNormalBufferHeight))
	{
		return;
	}
	int2 ColRowInDestNormalBuffer = (int2)(gx, gy);
	int2 ColRowInSrcDepthBuffer = srcColRow2destColRow_Panoramic(ColRowInDestNormalBuffer.x, ColRowInDestNormalBuffer.y, 0, destNormalBufferWidth, 0, srcDepthBufferWidth);
	
	uchar4 depthInfo = (uchar)(0);
	float3 localCoords = (float3)(0);
	
	int iPivotColInSrcDepthBuffer = -1;
	int iPivotRowInSrcDepthBuffer = -1;


	int2 pivotSearchAmplitude = (int2)(2, 2);
	int bFoundPivot = 0;
	for (int kx=0; kx<pivotSearchAmplitude.x; kx++)
	{
		for (int ky=0; ky<pivotSearchAmplitude.y; ky++)
		{
			iPivotColInSrcDepthBuffer = ColRowInSrcDepthBuffer.x+kx;
			if (iPivotColInSrcDepthBuffer>=srcDepthBufferWidth) iPivotColInSrcDepthBuffer-=srcDepthBufferWidth;
			iPivotRowInSrcDepthBuffer = ColRowInSrcDepthBuffer.y+ky;
			if (iPivotRowInSrcDepthBuffer<srcDepthBufferHeight)
			{
				depthInfo = srcDepthBuffer[srcDepthBufferWidth*iPivotRowInSrcDepthBuffer+iPivotColInSrcDepthBuffer];
				localCoords = getDepthBufferLocalCoordinates(depthInfo, iPivotColInSrcDepthBuffer, iPivotRowInSrcDepthBuffer, srcDepthBufferWidth, srcDepthBufferHeight);
				if ((localCoords.x!=0.f)||(localCoords.y!=0.f)||(localCoords.z!=0.f))
				{
					bFoundPivot = 1;
					break;
				}
			}
		}
		if (bFoundPivot==1)
		{
			break;
		}
	}

	if (bFoundPivot==0)
	{
		return;
	}
	
	float3 relativeDirection = (float3)(localCoords);
	float norm = sqrt(relativeDirection.x*relativeDirection.x + relativeDirection.y*relativeDirection.y + relativeDirection.z*relativeDirection.z);
	if (norm==0.f)
	{
		return;
	}
	relativeDirection /= norm;
	
	float x = localCoords.x;
	float y = localCoords.y;
	float z = localCoords.z;
	int ic, ir;
	int iCol;
	int iRow;
	uchar4 depthInfoNeigh;
	float3 neighbor;
	
	float3 neighborsGrid[searchSize*searchSize] = {0};
	int neighborsGrid_flags[searchSize*searchSize] = {0};
	int neighborsGrid_topologyIds[searchSize*searchSize] = {0};
	float3 pivot;
	
	int coldum, rowdum;
	float dist;

	for (ic=-searchHalfSize; ic<=searchHalfSize; ic++)
	{
		iCol = iPivotColInSrcDepthBuffer+ic;
		if (iCol<0) iCol+=srcDepthBufferWidth;
		if (iCol>=srcDepthBufferWidth) iCol-=srcDepthBufferWidth;
		for (ir=-searchHalfSize; ir<=searchHalfSize; ir++)
		{
			iRow = iPivotRowInSrcDepthBuffer+ir;
			if ((iRow>=0)&&(iRow<srcDepthBufferHeight))
			{
				depthInfoNeigh = srcDepthBuffer[srcDepthBufferWidth*iRow+iCol];
				neighbor = getDepthBufferLocalCoordinates(depthInfoNeigh, iCol, iRow, srcDepthBufferWidth, srcDepthBufferHeight);
				if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
				{
					neighborsGrid[(searchHalfSize+ir)*searchSize + searchHalfSize+ic] = neighbor;
				}
			}
		}
	}
	
	
	//---------------------------------------------------------
	//---------------------------------------------------------
	//Analyse de l'éligibilité des points
	//---------------------------------------------------------
	//---------------------------------------------------------
	neighborsGrid_flags[(searchHalfSize)*searchSize+searchHalfSize] = 1;
	int bFoundSpatialNeighbor;
	float pivotDistToCenter;
	
	for (iRow=0; iRow<searchSize; iRow++)
	{
		for (iCol=0; iCol<searchSize; iCol++)
		{
			pivot = neighborsGrid[iRow*searchSize+iCol];
			
			pivotDistToCenter = sqrt(pivot.x*pivot.x+pivot.y*pivot.y+pivot.z*pivot.z);

			bFoundSpatialNeighbor = 0;
			if ((pivot.x!=0.f)||(pivot.y!=0.f)||(pivot.z!=0.f))
			{
				for (ir=-1; ir<=1; ir++)
				{
					rowdum = iRow+ir;
					if ((rowdum>=0)&&(rowdum<searchSize))
					{
						for (ic=-1; ic<=1; ic++)
						{
							if ((ir!=0)||(ic!=0))
							{
								coldum = iCol+ic;
								if ((coldum>=0)&&(coldum<searchSize))
								{
									neighbor = neighborsGrid[rowdum*searchSize+coldum];
									if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
									{
										dist = sqrt((pivot.x-neighbor.x)*(pivot.x-neighbor.x)+(pivot.y-neighbor.y)*(pivot.y-neighbor.y)+(pivot.z-neighbor.z)*(pivot.z-neighbor.z));
										if (dist<2.f*pixelDimension_1meter*pivotDistToCenter)
										{
											neighborsGrid_flags[iRow*searchSize+iCol] = 1;
											bFoundSpatialNeighbor = 1;
											break;
										}
									}
								}
							}
						}
					}
					if (bFoundSpatialNeighbor==1)
					{
						break;
					}
				}
			}
		}
	}
	
	
	//---------------------------------------------------------
	//---------------------------------------------------------
	//Analyse topologique des points éligibles
	//---------------------------------------------------------
	//---------------------------------------------------------
	identifyDistinctForms(neighborsGrid_flags, neighborsGrid_topologyIds, searchSize, searchSize);
	int centricFormId = neighborsGrid_topologyIds[(searchHalfSize)*searchSize+searchHalfSize];
	
	int nbNeighborsMin = (int)(0.8f*searchSurface);

	float3 neighbors[searchSurface] = {0};
	int nbNeighbors = 0;
	for (int i=0; i<searchSize*searchSize; i++)
	{
		if (neighborsGrid_topologyIds[i]==centricFormId)
		{
		 	neighbor = neighborsGrid[i];
		 	if ((neighbor.x!=0.f)||(neighbor.y!=0.f)||(neighbor.z!=0.f))
		 	{
		 		neighbors[nbNeighbors].xyz = neighbor.xyz;
		 		nbNeighbors++;
		 	}
		}
	}

	if (nbNeighbors>nbNeighborsMin)
	{
		float4 bestNormal = getNormalBestFit(neighbors, nbNeighbors);
		float3 localN = bestNormal.xyz;
		
		if (dot(localN, relativeDirection)>0.f)
		{
			localN.x = -localN.x;
			localN.y = -localN.y;
			localN.z = -localN.z;
		}

		uchar8 normalInfo = motorLocalNormalInfo2uchar(localN.x, localN.y, localN.z, bestNormal.w);
	
  		destNormalBuffer[destNormalBufferWidth*gy+gx].x = normalInfo.s0; 		//BLEU
  		destNormalBuffer[destNormalBufferWidth*gy+gx].y = normalInfo.s1; 		//VERT
  		destNormalBuffer[destNormalBufferWidth*gy+gx].z = normalInfo.s2; 		//ROUGE
  		destNormalBuffer[destNormalBufferWidth*gy+gx].w = normalInfo.s3; 		//ALPHA	
  		
  		destNormalBuffer[destNormalBufferWidth*(gy+destNormalBufferHeight)+gx].x = normalInfo.s4; 	//BLEU
  		destNormalBuffer[destNormalBufferWidth*(gy+destNormalBufferHeight)+gx].y = normalInfo.s5; 	//VERT
  		destNormalBuffer[destNormalBufferWidth*(gy+destNormalBufferHeight)+gx].z = normalInfo.s6; 	//ROUGE
  		destNormalBuffer[destNormalBufferWidth*(gy+destNormalBufferHeight)+gx].w = normalInfo.s7; 	//ALPHA	
	}
	
}
}



}

}

