/*This can be a header file include in OpenCL programs
* Some notes about OpenCL.
* Many GPU's (Quadro included) use little-Endian byte ordering, and Java uses Big-Endian by default, luckily the
* Jogamp library already converts the CLBuffers whenever necessary; however, sending custom raw byte data requires
* paying attention to this.
*/
#ifndef __OPENCL_VERSION__ //This should fail during an actual openCL compile, used only to trick Eclipse into syntax highlighting this file as "C" code.
#define __kernel
#define __global
#define kernel
#define global
#define constant
#define local
#define float2 float
#define float3 float
#define float4 float
#define uchar4 char
#endif

#ifndef M_PI //Some older (Intel) compilers are actually missing this!
#define M_PI 3.141592653f
#endif
constant float infinity = 1.0E14f;//Using the built-in INFINITY might cause some incompatibilities on older hardware, like -INFINITY or fabs(INFINITY).
constant float infinity_neg = -1.0E14f;//This value is enough for us. No need to take any value smaller than that. And we are sure this will not crash.
constant float mathEpsilon = 1.0E-12f;//Could use built-in OpenCL FLT_MIN, but is it always compatible?
constant float epsilon = 0.0001f;
constant float tolerance = 0.0001f;




constant int previousCellOffsetSize = 4;
constant int previousCellOffsetX[4] = { -1, -1, -1,  0 };
constant int previousCellOffsetY[4] = { -1,  0,  1, -1 };

#define TOPOLOGY_FORM_START_ID 2


#define MTX_CHUNK_SIZE 33 //two 4x4 matrices (inv then forward) and 1 scale factor
/**
* These are parameters for drawing the demolition grid-lines.
* !!!WARNING!!! Must match the order written by Java's OCLDrawParams class and RvDemoManager.OCLDrawParams.serialize()
*/
struct stDrawParams {
	char	m_iPattern;		//0=none (solid), 1=gridlines, 2=checkered, 3=fog
	char	m_iEnhanceLevel;//0/1 determines how much averaging to do in tournament
	char	m_iNOTHING1;	//empty placeholder, was m_iMaxDistance
	char	m_iNOTHING2;	//empty placeholder
	float 	m_fLineSpacing;	//Spacing between lines (or width of checkers)
	float	m_fLineWidth;	//Width of gridline (if applicable)
	uchar4	m_iColorLight;	//1st color in pattern
	uchar4	m_iColorDark;	//2nd color in pattern
	float	m_fAlign; 		//The scan angle alignment score between -1 and 1.
	float	m_fBrush; 		//The brush size scale (1.0 is normal)
};

/**
 * Stores the alpha channel values that should be used for writing depth buffer point qualities.
 * We'll be sending this from Java in 8-bit uchar chunks in the same order as they are listed here!
 * !!!WARNING!!! The ordinals of the Java enum must be in the same ORDER and QUANTITY as specified here!
 * Otherwise direct memory mapping will be offset!
 * @see RvDepthTool.DepthBufferPointQuality
 */
struct stDBPointQuality {
	uchar	STANDARD;	//0 ordinal
	uchar 	NOPOINT;	//1
	uchar	SKIM;		//2
	uchar	UNDEFINED;	//3
	uchar	CLEANED;	//4
	uchar	FOREIGN;	//5
	uchar	PSEUDO;		//6
};

/**As a convention we've chosen the "float" type for the buffer since it'll be the most commonly volume param type, and because trying to
* access 32-bit values which are NOT aligned in 4 byte incremental memory (byte indices non-divisible evenly by 4) crashes on most/all GPU's!
* Unaligned memory access is not allowed on GPU's because of the performance decrease.
* These functions do a memory cast into the desired type reading memory as little-Endian (most likely, depends on the hardware).
* NOTE that Java writes Big-Endian, but jogamp converts it automatically for us.
*/
int readIntI(uint *p, constant int *buffer) {//read an int from an int buffer
	return buffer[(*p)++];
}
int readInt(uint *p, constant float *buffer) { //read an int from a float buffer
	return *((constant int*)(&(buffer[(*p)++])));
}
float readFloat(uint *p, constant float *buffer) {
	return buffer[(*p)++];
}
//This method reads 2 floats individually, ensuring never to misalign memory access.
float2 readFloat2(uint *p, constant float *buffer) {
	float2 out = (float2)(buffer[(*p)++], buffer[(*p)++]);
	return out;
}
//This method reads 3 ints individually, ensuring never to misalign memory access.
int3 readInt3(uint *p, constant float *buffer) {
	constant int *buffI = (constant int*)buffer;
	int3 out = (int3)(buffI[(*p)++], buffI[(*p)++], buffI[(*p)++]);
	return out;
}
//This method reads 3 floats individually, ensuring never to misalign memory access.
float3 readFloat3(uint *p, constant float *buffer) {
	float3 out = (float3)(buffer[(*p)++], buffer[(*p)++], buffer[(*p)++]);
	//float3 out(*((constant float*)(&buffer[p++])), *((constant float*)(&buffer[p++])), *((constant float*)(&buffer[p++])));
	return out;
}
//This method reads 4 floats individually, ensuring never to misalign memory access.
float4 readFloat4(uint *p, constant float *buffer) {
	float4 out = (float4)(buffer[(*p)++], buffer[(*p)++], buffer[(*p)++], buffer[(*p)++]);
	return out;
	/*This seemed to work without any misalignement, so maybe it's ok?
	uint p2 = *p;//Copy the current address index
	(*p) += 4;//Skip 16 bytes (4 floats)
	return *((constant float4*)(&buffer[p2]));*/
}
/*Need to enable 64bit on some hardware to support doubles (Intel GPU on Lenovo)
 * double readDouble(uint *p, constant float *buffer) {
	uint p2 = *p;//Copy the current address index
	(*p) += 2;//Skip 8 bytes (1 double)
	return *((constant double*)(&buffer[p2]));
}*/
/**Must write in 32-bit alignment! "float" used only so pointer arithmetic is forced into 32-bit alignment!
 *
*/
void writeInt(const uint p, global float *buffer, int value) {
	*((global int*)(&(buffer[p]))) = value;
	//(*p) += 4;//4 bytes in an int
}

//----------------------------------------------------------------------------------------------
/** Extract a single byte channel of an ARGB 32-bit integer. NOTE if you have a ucha4,
 * just use *.s0 thru *.s3 to retrieve each BGRA channel respectively (yes it's opposite).
 * @param origARGB The 32-bit input value to extract a single 8-bit channel from.
 * @param channel 0-3 for ARGB channel respectively.
 *
 */
uchar getChannel(int origARGB, uchar channel) {
	uchar shift = (3-channel)*8; //Number of bits to shift the incoming channel before overwriting
	uint keepMask = (0xff << shift); //Get a mask that has 0's in the 3 channels we don't want, 1's in the single channel we want to keep
	uint clearedVal = keepMask & origARGB; //Clear the channel we want to overwrite
	return clearedVal >> shift; //triple chevron for UNSIGNED right shift (double chevron is signed shift)
}

/** Overwrite just one 8-bit channel of a 32-bit ARGB.
 * @param origARGB The original ARGB stored as a uchar4.
 * @param channel 0-3 for ARGB channel respectively.
 * @param newChannelValue The 0-255 value to write into the desired single channel.
 */
uint setChannelFast(uint origARGB, uchar channel, uchar newChannelValue) {
	uchar shift = (3-channel)*8; //Number of bits to shift the incoming channel before overwriting
	uint keepMask = ~(0xff << shift); //Get a mask that has 1's in the 3 channels we want to keep, 0's in the single channel we want to overwrite
	uint clearedVal = keepMask & origARGB; //Clear the channel we want to overwrite
	uint ret = clearedVal | (newChannelValue << shift);
	return ret;
}

/** Overloaded version for uchar4.
 * We must cast the incoming uchar4 to a uint, otherwise OpenCL attempts to repeat operations on all 4 elements it seems.
 * @see setChannelFast
 */
uchar4 setChannelFastu(uchar4 origARGB, uchar channel, uchar newChannelValue) {
	uint ret = setChannelFast(*(uint*)&origARGB, channel, newChannelValue);
	return *(uchar4*)&ret;
}
//----------------------------------------------------------------------------------------------
//Return the ARGB field (8 bits each) stored into a single 32-bit integer.
int ARGB2Int(uchar4 in) {
	return *((int*)(&in));
}
//Return the ARGB field (8 bits each) stored into a single 32-bit integer.
uchar4 int2UChar4(int in) {
	return *((uchar4*)(&in));//This works!!
	/*uchar4 out;
	out.s0 = in >> 24;
	out.s1 = in >> 16;
	out.s2 = in >> 8;
	out.s3 = in;
	return out;*/
}
//Return the ARGB field (8 bits each) stored into a single 32-bit integer.
int toARGBi(uchar a, uchar r, uchar g, uchar b) {
	return (a << 24) | (r << 16) | (g << 8) | b;
}
//Return the ARGB field (8 bits each) cast into a single 32-bit uchar4.
uchar4 toARGBu(uchar a, uchar r, uchar g, uchar b) {
	//int ARGB = toARGBi(a,r,g,b);
	//return *((uchar4*)(&ARGB));
	return (uchar4)(b,g,r,a);//This really works, s3 is alpha!
}
//Return the ARGB field (8 bits each) cast into a single 32-bit float.
float toARGBf(uchar a, uchar r, uchar g, uchar b) {
	int ARGB = toARGBi(a,r,g,b);
	return *((float*)(&ARGB));
}
//----------------------------------------------------------------------------------------------
float3 sphericalCoordinatesToCartesianCordinatesOnUnitSphere(float thetaX, float thetaY)
{
	float ctx = (float)cos(thetaX);
	float cty = (float)cos(thetaY);
	float stx = (float)sin(thetaX);
	float sty = (float)sin(thetaY);
	float3 cartesianCoords = (float3)(ctx*cty, stx*cty, sty);
	return cartesianCoords;
}
//Returns a unit vector in the direction of the pixel located scanorama's float index of (col,row) total dimensions WxH.
float3 getLocalDirection_Panoramic(float col, float row, int W, int H)
{
	float TWO_PI = 2.f*M_PI_F;
	float thetaX = (float)(W-1.f-col)*TWO_PI/(float)W;
	float thetaY = M_PI_F*(0.5f - (float)row/(float)H);
	float3 onUnitSphere = sphericalCoordinatesToCartesianCordinatesOnUnitSphere(thetaX, thetaY);
	float3 localDirection = (float3)(-onUnitSphere.x, onUnitSphere.z, onUnitSphere.y);
	return localDirection;
}
float getDepthBufferLocalDistance(const uchar4 depthBufferInfo)
{
	int compR = convert_int(depthBufferInfo.z);
	int compG = convert_int(depthBufferInfo.y);
	int compB = convert_int(depthBufferInfo.x);

	int R100 = (int)(compR/100);
	return (float)(R100*256*256 + compG*256 + compB)/1000.f;	
}
/*Returns Cartesian coordinates in meters.
*/
float3 getDepthBufferLocalCoordinates(const uchar4 depthBufferInfo, int iX, int iY, int W, int H)
{
	int compR = convert_int(depthBufferInfo.z);
	int compG = convert_int(depthBufferInfo.y);
	int compB = convert_int(depthBufferInfo.x);

	int R100 = (int)(compR/100);
	float depth = (float)(R100*256*256 + compG*256 + compB)/1000.f;	//Depth in meters
	if (depth == 0.0f) {
		return (float3)(0.0f,0.0f,0.0f); //No data exists at this pixel
	}
	int R10 	= (int)((compR-100*R100)/10.f);
	int R1 		= compR-100*R100-10*R10;

	float cA_tilde = (float)R10/10.f;//Angle offset theta
	float rA_tilde = (float)R1/10.f; //Angle offset phi

	float iHDouble = iX + cA_tilde;
	if (iHDouble>=W) iHDouble = iHDouble-W; //Actual angle index theta
	float iVDouble = iY + rA_tilde; //Actual angle index phi

	float thetaX = 2.f*M_PI_F*(W-1.f-iHDouble)/W; //Convert theta to radians
	float thetaY = -M_PI_F/2.f + M_PI_F*(H-1.f-iVDouble)/H; //Convert phi to radians

	float3 localCoordsReal = sphericalCoordinatesToCartesianCordinatesOnUnitSphere(thetaX, thetaY);
	float xReal = localCoordsReal.x;
	float yReal = localCoordsReal.y;
	float zReal = localCoordsReal.z;

	float3 localCoords = (float3)(-depth*xReal, depth*zReal, depth*yReal); //Real to Motor

	return localCoords;
}
float4 getMeanValue_float4(const float4* values, const int nbVals)
{
	float4 meanVal = (float4)(0);
	
	for (int i=0; i<nbVals; i++)
	{
		meanVal.x+=values[i].x;
		meanVal.y+=values[i].y;
		meanVal.z+=values[i].z;
		meanVal.w+=values[i].w;
	}
	meanVal/=nbVals;
	return meanVal;
}
	
float3 getMeanValue(const float3* values, const int nbVals)
{
	float3 meanVal = (float3)(0.f, 0.f, 0.f);
	
	for (int i=0; i<nbVals; i++)
	{
		meanVal.x+=values[i].x;
		meanVal.y+=values[i].y;
		meanVal.z+=values[i].z;
	}
	meanVal/=nbVals;
	return meanVal;
}

float3 getActiveMeanValue(const float3* values, const int* activeValues, const int nbVals)
{
	float3 meanVal = (float3)(0.f, 0.f, 0.f);
	int nbActiveValues = 0;
	for (int i=0; i<nbVals; i++)
	{
		if (activeValues[i]==1)
		{
			meanVal.x+=values[i].x;
			meanVal.y+=values[i].y;
			meanVal.z+=values[i].z;
			nbActiveValues++;
		}

	}
	if (nbActiveValues!=0) 
		meanVal/=nbActiveValues;
	return meanVal;
}
void getOrthogonalRegressionEnergyFunction(const float3* samplePoints, const int nbVals, float3 samplePointsAverage, float* m) 
{
	float a = samplePointsAverage.x;
	float b = samplePointsAverage.y;
	float c = samplePointsAverage.z;

	for (int i=0; i<9; i++) m[i] = 0.f;

	float xi, yi, zi;
	
	for (int i=0; i<nbVals; i++)
	{
		xi = samplePoints[i].x; 
		yi = samplePoints[i].y; 
		zi = samplePoints[i].z;
		m[0*3+0] += (xi-a)*(xi-a);		
		m[0*3+1] += (xi-a)*(yi-b);		
		m[0*3+2] += (xi-a)*(zi-c);
		m[1*3+1] += (yi-b)*(yi-b);		
		m[1*3+2] += (yi-b)*(zi-c);		
		m[2*3+2] += (zi-c)*(zi-c);   
	}

	m[1*3+0] = m[0*3+1];	
	m[2*3+0] = m[0*3+2]; 
	m[2*3+1] = m[1*3+2];

	for (int i=0; i<9; i++)
	{
		m[i] /= nbVals;
	}
}
// Symmetric Householder reduction to tridiagonal form.
void tred2 (float* V, float* d, float* e) 
{
	//  This is derived from the Algol procedures tred2 by
   	//  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
   	//  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
   	//  Fortran subroutine in EISPACK.

	for (int j = 0; j < 3; j++) 
	{
		d[j] = V[(3-1)*3+j];
	}

	// Householder reduction to tridiagonal form.
	for (int i = 3-1; i > 0; i--) 
	{
		// Scale to avoid under/overflow.
		float scale = 0.0f;
		float h = 0.0f;
		for (int k = 0; k < i; k++) 
		{
			scale = scale + fabs(d[k]);
		}
		if (scale == 0.0f) 
		{
            e[i] = d[i-1];
            for (int j = 0; j < i; j++) {
               d[j] = V[(i-1)*3+j];
               V[i*3+j] = 0.0f;
               V[j*3+i] = 0.0f;
            }
         } 
         else 
         {
            // Generate Householder vector.
            for (int k = 0; k < i; k++) 
            {
               d[k] /= scale;
               h += d[k] * d[k];
            }
            float f = d[i-1];
            float g = sqrt(h);
            if (f > 0) 
            {
               g = -g;
            }
            e[i] = scale * g;
            h = h - f * g;
            d[i-1] = f - g;
            for (int j = 0; j < i; j++) 
            {
               e[j] = 0.0f;
            }
   
            // Apply similarity transformation to remaining columns.
   
            for (int j = 0; j < i; j++) 
            {
               f = d[j];
               V[j*3+i] = f;
               g = e[j] + V[j*3+j] * f;
               for (int k = j+1; k <= i-1; k++) 
               {
                  g += V[k*3+j] * d[k];
                  e[k] += V[k*3+j] * f;
               }
               e[j] = g;
            }
            f = 0.0f;
            for (int j = 0; j < i; j++) 
            {
               e[j] /= h;
               f += e[j] * d[j];
            }
            float hh = f / (h + h);
            for (int j = 0; j < i; j++) 
            {
               e[j] -= hh * d[j];
            }
            for (int j = 0; j < i; j++) 
            {
               f = d[j];
               g = e[j];
               for (int k = j; k <= i-1; k++) {
                  V[k*3+j] -= (f * e[k] + g * d[k]);
               }
               d[j] = V[(i-1)*3+j];
               V[i*3+j] = 0.0f;
            }
         }
         d[i] = h;
      }
   
      // Accumulate transformations.
   
      for (int i = 0; i < 3-1; i++) 
      {
         V[(3-1)*3+i] = V[i*3+i];
         V[i*3+i] = 1.0f;
         float h = d[i+1];
         if (h != 0.0f) 
         {
            for (int k = 0; k <= i; k++) 
            {
               d[k] = V[k*3+i+1] / h;
            }
            for (int j = 0; j <= i; j++) 
            {
               float g = 0.0f;
               for (int k = 0; k <= i; k++) 
               {
                  g += V[k*3+i+1] * V[k*3+j];
               }
               for (int k = 0; k <= i; k++) 
               {
                  V[k*3+j] -= g * d[k];
               }
            }
         }
         for (int k = 0; k <= i; k++) 
         {
            V[k*3+i+1] = 0.0f;
         }
      }
      for (int j = 0; j < 3; j++) 
      {
         d[j] = V[(3-1)*3+j];
         V[(3-1)*3+j] = 0.0f;
      }
      V[(3-1)*3+3-1] = 1.0f;
      e[0] = 0.0f;
} 

//Computes (a2 + b2)1/2 without destructive underflow or overflow.
float pythag(float a, float b)
{
 	float r;
    if (fabs(a) > fabs(b)) 
    {
		r = b/a;
        r = fabs(a)*sqrt(1.f+r*r);
    } 
    else if (b != 0) 
    {
		r = a/b;
        r = fabs(b)*sqrt(1.f+r*r);
    } 
    else 
    {
		r = 0.0f;
    }
    return r;
} 

//Computes (a^2 + b^2)^(1/2) more quickly?
float pythagCL(float a, float b)
{
    return sqrt(a*a+b*b);
}

// Symmetric tridiagonal QL algorithm.
void tql2 (float* V, float* d, float* e) 
{
	int n = 3;
   	//  This is derived from the Algol procedures tql2, by
   	//  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
   	//  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
   	//  Fortran subroutine in EISPACK.
	for (int i = 1; i < n; i++) 
	{
		e[i-1] = e[i];
    }
    e[n-1] = 0.0f;
   
    float f = 0.0f;
    float tst1 = 0.0f;
    float eps = (float)pow(2.0f,-52.0f);
    
    for (int l = 0; l < n; l++) 
    {
         // Find small subdiagonal element
         tst1 = max(tst1,fabs(d[l]) + fabs(e[l]));
         int m = l;
         while (m < n) {
            if (fabs(e[m]) <= eps*tst1) {
               break;
            }
            m++;
         }
   
         // If m == l, d[l] is an eigenvalue,
         // otherwise, iterate.
   
         if (m > l) 
         {
            int iter = 0;
            do 
            {
               iter = iter + 1;  // (Could check iteration count here.)
   
               // Compute implicit shift
               float g = d[l];
               float p = (d[l+1] - g) / (2.0f * e[l]);
               float r = pythag(p,1.0f);
               if (p < 0) {
                  r = -r;
               }
               d[l] = e[l] / (p + r);
               d[l+1] = e[l] * (p + r);
               float dl1 = d[l+1];
               float h = g - d[l];
               for (int i = l+2; i < n; i++) 
               {
                  d[i] -= h;
               }
               f = f + h;
   
               // Implicit QL transformation.
               p = d[m];
               float c = 1.0f;
               float c2 = c;
               float c3 = c;
               float el1 = e[l+1];
               float s = 0.0f;
               float s2 = 0.0f;
               for (int i = m-1; i >= l; i--) 
               {
                  c3 = c2;
                  c2 = c;
                  s2 = s;
                  g = c * e[i];
                  h = c * p;
                  r = pythag(p,e[i]);
                  e[i+1] = s * r;
                  s = e[i] / r;
                  c = p / r;
                  p = c * d[i] - s * g;
                  d[i+1] = h + s * (c * g + s * d[i]);
   
                  // Accumulate transformation.
                  for (int k = 0; k < n; k++) 
                  {
                     h = V[k*3+i+1];
                     V[k*3+i+1] = s * V[k*3+i] + c * h;
                     V[k*3+i] = c * V[k*3+i] - s * h;
                  }
               }
               p = -s * s2 * c3 * el1 * e[l] / dl1;
               e[l] = s * p;
               d[l] = c * p;
   
               // Check for convergence.
            } 
            while (fabs(e[l]) > eps*tst1);
         }
         d[l] = d[l] + f;
         e[l] = 0.0f;
      }
     
      // Sort eigenvalues and corresponding vectors.
      for (int i = 0; i < n-1; i++) 
      {
         int k = i;
         float p = d[i];
         for (int j = i+1; j < n; j++) 
         {
            if (d[j] < p) 
            {
               k = j;
               p = d[j];
            }
         }
         if (k != i) 
         {
            d[k] = d[i];
            d[i] = p;
            for (int j = 0; j < n; j++) 
            {
               p = V[j*+i];
               V[j*3+i] = V[j*3+k];
               V[j*3+k] = p;
            }
         }
      }
 }
void EigenvalueDecomposition_Symetric3x3(const float* A, float* V, float* d, float* e) 
{	
    //V = new double[n][n];
    //d = new double[n];
    //e = new double[n];
    for (int i = 0; i < 3; i++) 
    {
    	for (int j = 0; j < 3; j++) 
    	{
    		V[i*3+j] = A[i*3+j];
    	}
    }
    // Tridiagonalize.
    tred2(V, d, e);
   
    // Diagonalize.
    tql2(V, d, e); 
 }

float4 getNormalBestFit(const float3* points, const int nbVals)
{
	float4 bestNormal = (float4)(0.f,0.f,0.f,infinity);

	//--------------------------------------------------------------
	//Get the average of the sample points
	//--------------------------------------------------------------
	float3 barycenter = getMeanValue(points, nbVals);

	//--------------------------------------------------------------
	//Get the energy function
	//--------------------------------------------------------------
	float energyMatrix[9] = {0};
	getOrthogonalRegressionEnergyFunction(points, nbVals, barycenter, energyMatrix);

	//--------------------------------------------------------------
	//Get the smallest eigenvalue
	//--------------------------------------------------------------
	float eigenvectors[9] = {0};
	float eigenvalues[3] = {0};
	float e[3] = {0};
	
	EigenvalueDecomposition_Symetric3x3(energyMatrix, eigenvectors, eigenvalues, e);

	int indSmallestValue = 0;
	int smallestVal = eigenvalues[0];
	if (eigenvalues[1]<smallestVal) 
	{
		smallestVal = eigenvalues[1];
		indSmallestValue = 1;
	}
	if (eigenvalues[2]<smallestVal) 
	{
		smallestVal = eigenvalues[2];
		indSmallestValue = 2;
	}			  

	//--------------------------------------------------------------
	//Get the normal (= eigenvector corresponding to smallest eigenvalue)
	//--------------------------------------------------------------
	bestNormal.x = eigenvectors[0*3+indSmallestValue];
	bestNormal.y = eigenvectors[1*3+indSmallestValue];
	bestNormal.z = eigenvectors[2*3+indSmallestValue];
	
	float norm = sqrt(bestNormal.x*bestNormal.x + bestNormal.y*bestNormal.y +bestNormal.z*bestNormal.z);
	if (norm!=0.0f) 
	{
		bestNormal.x /= norm;
		bestNormal.y /= norm;
		bestNormal.z /= norm;
	}
	
	//--------------------------------------------------------------
	//Get the score
	//--------------------------------------------------------------
	float scalN_mean = 0;
	float scalN;
	int i;
	for (i=0; i<nbVals; i++)
	{
		float3 P = points[i];
		scalN = P.x*bestNormal.x+P.y*bestNormal.y+P.z*bestNormal.z;
		scalN_mean += scalN;
	}
	scalN_mean/=nbVals;
	
	float scoreGlobal = 0;
	float d2;
	for (i=0; i<nbVals; i++)
	{
		float3 P = points[i];
		scalN = P.x*bestNormal.x+P.y*bestNormal.y+P.z*bestNormal.z;
		d2 = (scalN - scalN_mean)*(scalN - scalN_mean);
		scoreGlobal += d2;
	}	
	scoreGlobal /= nbVals;
	bestNormal.w = scoreGlobal;
	
	
	/*
	bestNormal.w = smallestVal;
	*/
	
	return bestNormal;
}
void identifyDistinctForms(const int* topologyMask, int* topologyIds, const int nbCellsX, const int nbCellsY)
{
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			topologyIds[iy*nbCellsX+ix] = 0;
		}
	}
	int foundFormIds[4] = { 0 };
		
	int formId;
	
	int formCtr = 0;
	int xdum, ydum;
	int inddum;
	int bFounddum = 0;
	
	int topoMaskVal;
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			if (topologyMask[iy*nbCellsX+ix]==1)
			{
				foundFormIds[0] = 0;
				foundFormIds[1] = 0;
				foundFormIds[2] = 0;
				foundFormIds[3] = 0;
				int nbFoundForms = 0;
				int foundFormCtr = 0;
				for (int i=0; i<4; i++)
				{
					xdum = ix+previousCellOffsetX[i];
					ydum = iy+previousCellOffsetY[i];
					if ((xdum>=0)&&(xdum<nbCellsX)&&(ydum>=0)&&(ydum<nbCellsY))
					{
						inddum = ydum*nbCellsX + xdum;
						topoMaskVal = topologyMask[inddum];
						if (topoMaskVal==1)
						{
							bFounddum = 0;
							int currFormId = topologyIds[inddum];
							for (int k=0; k<foundFormCtr; k++)
							{
								if (foundFormIds[k]==currFormId)
								{
									bFounddum = 1;
									break;
								}
							}
							if (bFounddum==0)
							{
								foundFormIds[foundFormCtr] = currFormId;
								foundFormCtr++;
								nbFoundForms++;
							}
						}	
					}
				}
				if (nbFoundForms>0)
				{
					formId = foundFormIds[0];
					if (nbFoundForms>1)
					{
						for (int k=1; k<nbFoundForms; k++)
						{
							int id = foundFormIds[k];
							for (int ix2=0; ix2<nbCellsX; ix2++)
							{
								for (int iy2=0; iy2<nbCellsY; iy2++)
								{
									if (topologyIds[iy2*nbCellsX+ix2]==id) 
									{
										topologyIds[iy2*nbCellsX+ix2] = formId;
									}
								}
							}
						}
					}
				}
				else
				{
					formCtr++;
					formId = formCtr;
				}
				topologyIds[iy*nbCellsX+ix] = formId;
			}
		}
	}
			
	
	/*
	return 0;
	*/
}
int identifyDistinctFormsInGrid_max254Forms(uchar* topologyMask, const int nbCellsX, const int nbCellsY)
{
	uchar foundExistingFormIds[4] = { 0 };
		
	uchar formId;
	//We reserve 0 to indicate the cell is empty, 1 the cell is filled
	//We use from 2 to 255 for form identification
	uchar formCtr = TOPOLOGY_FORM_START_ID;

	int nbFormsFound = 0;
	
	int x, y;
	bool bFound = false;
	
	uchar topoMaskVal;
	
	for (int ix=0; ix<nbCellsX; ix++)
	{
		for (int iy=0; iy<nbCellsY; iy++)
		{
			int ind = iy*nbCellsX+ix;
			if (topologyMask[ind] == 1)
			{
				for (int i=0; i<4; i++)
					foundExistingFormIds[i] = 0;
				int nbFoundExistingForms = 0;
				int foundExistingFormCtr = 0;
				for (int i=0; i<4; i++)
				{
					x = ix+previousCellOffsetX[i];
					y = iy+previousCellOffsetY[i];
					if ((x>=0)&&(x<nbCellsX)&&(y>=0)&&(y<nbCellsY))
					{
						topoMaskVal = topologyMask[y*nbCellsX + x];
						if (topoMaskVal > 1)
						{
							bFound = false;
							int currFormId = topoMaskVal;
							for (int k=0; k<foundExistingFormCtr; k++)
							{
								if (foundExistingFormIds[k]==currFormId)
								{
									bFound = true;
									break;
								}
							}
							if (!bFound)
							{
								foundExistingFormIds[foundExistingFormCtr] = currFormId;
								foundExistingFormCtr++;
								nbFoundExistingForms++;
							}
						}	
					}
				}
				if (nbFoundExistingForms>0)
				{
					formId = foundExistingFormIds[0];
					if (nbFoundExistingForms>1)
					{
						for (int k=1; k<nbFoundExistingForms; k++)
						{
							int id = foundExistingFormIds[k];
							for (int ix2=0; ix2<nbCellsX; ix2++)
							{
								for (int iy2=0; iy2<nbCellsY; iy2++)
								{
									if (topologyMask[iy2*nbCellsX+ix2]==id) 
										topologyMask[iy2*nbCellsX+ix2] = formId;
								}
							}
						}
					}
				}
				else
				{
					if (formCtr == 253)
					{
						//We are not allowed to create a new form, we exceeded our capacity
						return false;
					}
					formId = formCtr;
					formCtr++;
					nbFormsFound++;
				}
				topologyMask[ind] = formId;
			}
		}
	}
	return nbFormsFound;
}
float2 getSphericalCoordinates(	float xReal, float yReal, float zReal,
								float CxReal, float CyReal, float CzReal, 
								float axisXxReal, float axisXyReal, float axisXzReal, 
								float axisYxReal, float axisYyReal, float axisYzReal, 
								float verticalxReal, float verticalyReal, float verticalzReal)
{
	float CPx = xReal-CxReal;
	float CPy = yReal-CyReal;
	float CPz = zReal-CzReal;
	float scal = CPx*verticalxReal+CPy*verticalyReal+CPz*verticalzReal;
	float QPx = scal*verticalxReal;
	float QPy = scal*verticalyReal;
	float QPz = scal*verticalzReal;
	float CQx = CPx-QPx;
	float CQy = CPy-QPy;
	float CQz = CPz-QPz;
	float normCQ = sqrt(CQx*CQx+CQy*CQy+CQz*CQz);
	float normCP = sqrt(CPx*CPx+CPy*CPy+CPz*CPz);
	
	float thetaX = 0;
	if (normCQ==0)
	{
		thetaX = 0;
	}
	else
	{
		thetaX = asin(CQy/normCQ);
		if (CQx>0)
		{
			if (thetaX<0) thetaX = 2.f*M_PI_F + thetaX;
		}
		else if (CQx<0)
		{
			thetaX = M_PI_F - thetaX;
		}
		else
		{
			if (CQy>=0) 	thetaX = 0.5f*M_PI_F;
			else 			thetaX = 3.f*M_PI_F/2.f;
		}
	}
	float thetaY = asin(QPz/normCP);

	float2 sphericalCoords = (float2)(thetaX, thetaY);
	return sphericalCoords;
}

/**Convert motor2local
 * */
float3 realCoordinates2MotorCoordinates(float3 pointReal) {
	return (float3)(-pointReal.x, pointReal.z, pointReal.y);
}

float3 realCoordinates2MotorCoordinatesOrigin(float3 pointReal, float3 pointOrigin)
{
	return realCoordinates2MotorCoordinates(pointReal) - pointOrigin;
}

/**
 * @param xMotorLocal
 * @param yMotorLocal
 * @param zMotorLocal
 * @param W,H
 * @return (-1,-1) if the point was too close, otherwise the index in the image
 */
float2 getDepthBufferCoordinates(float xMotorLocal, float yMotorLocal, float zMotorLocal, int W, int H)
{
	float distanceMetres = sqrt(xMotorLocal*xMotorLocal+yMotorLocal*yMotorLocal+zMotorLocal*zMotorLocal);
	if (distanceMetres < 0.00001f)
		return (float2)(-1.f, -1.f);

	float nx = xMotorLocal/distanceMetres;
	float ny = yMotorLocal/distanceMetres;
	float nz = zMotorLocal/distanceMetres;
	
	float2 sphericalCoord = getSphericalCoordinates(-nx, nz, ny, 0, 0, 0, 1.f, 0, 0, 0, 1.f, 0, 0, 0, 1.f);
	float theta = sphericalCoord.x;
	float phi = sphericalCoord.y;
	float PI = M_PI_F;
	float TWO_PI = 2.f*PI;
	float PIOVER2 = 0.5f*PI;
	float thetaNormalized = fmod(theta, TWO_PI);
	if (thetaNormalized < 0)
		thetaNormalized += TWO_PI;

	float2 areaIndexDbl = (float2)(0);
	areaIndexDbl.x = thetaNormalized*(float)W/TWO_PI;
	areaIndexDbl.y = (PIOVER2+phi)*(float)H/PI;
		
	float2 depthBufferCoords = (float2)(W-1.f-areaIndexDbl.x, max(H-1.f-areaIndexDbl.y, 0.f));
	return depthBufferCoords;
}

uchar4 motorLocalPoint2uchar(float xMotorLocal, float yMotorLocal, float zMotorLocal, int W, int H, uchar alpha)
{
	float distanceMetres = sqrt(xMotorLocal*xMotorLocal+yMotorLocal*yMotorLocal+zMotorLocal*zMotorLocal);
	int distanceMilli = (int)(1000*distanceMetres);
	int d256x256 	  = (int)(distanceMilli/(256*256));
	int d256 		  = (int)((distanceMilli-256*256*d256x256)/256);
	int d1 			  = distanceMilli-256*256*d256x256-256*d256;
	float2 deptBufferCoordsDouble;
	uchar4 MotorLocalPointInfo = (uchar4)(0);

	if (d256x256 <= 1) {
		deptBufferCoordsDouble = getDepthBufferCoordinates(xMotorLocal, yMotorLocal, zMotorLocal, W, H);
		float cA = deptBufferCoordsDouble.x;
		float rA = deptBufferCoordsDouble.y;
		int iCol = (int)cA;
		int iRow = (int)rA;

		float cA_tilde = cA-iCol;
		float rA_tilde = rA-iRow;

		if ( (iRow>=0) && (iRow<H) )
		{
			if (iCol<0) 	iCol+=W;
			if (iCol>=W) 	iCol-=W;

			int cA_tilde10 = (int)(10*cA_tilde);
			int rA_tilde10 = (int)(10*rA_tilde);

			int compR = d256x256*100 + cA_tilde10*10 + rA_tilde10;
			int compG = d256;
			int compB = d1;

			MotorLocalPointInfo.s3 = (uchar)(alpha);	//ALPHA quality
			MotorLocalPointInfo.s2 = (uchar)(compR);	//RED
			MotorLocalPointInfo.s1 = (uchar)(compG);	//GREEN
			MotorLocalPointInfo.s0 = (uchar)(d1);		//BLUE
		}
	}
	return MotorLocalPointInfo;
}

/**
* Pass by reference to get the depthinfo and coordinates
* @param deptBufferCoordsDouble The i,j coordinate (with float precision) of the input x,y,z coordinate in this buffer of size WxH.
* Returns the distance (in meters) to the local point.
*/
float motorLocalPoint2ucharAndCoord(float xMotorLocal, float yMotorLocal, float zMotorLocal, int W, int H, uchar4 *MotorLocalPointInfo, float2 *deptBufferCoordsDouble, uchar alpha)
{
	float distanceMetres = sqrt(xMotorLocal*xMotorLocal+yMotorLocal*yMotorLocal+zMotorLocal*zMotorLocal);
	int distanceMilli = (int)(1000*distanceMetres);
	int d256x256 	  = (int)(distanceMilli/(256*256));
	int d256 		  = (int)((distanceMilli-256*256*d256x256)/256);
	int d1 			  = distanceMilli-256*256*d256x256-256*d256;

	if (d256x256 > 1)
		return 0;

	*deptBufferCoordsDouble = getDepthBufferCoordinates(xMotorLocal, yMotorLocal, zMotorLocal, W, H);
	float cA = deptBufferCoordsDouble->x;
	float rA = deptBufferCoordsDouble->y;
	int iCol = (int)cA;
	int iRow = (int)rA;

	float cA_tilde = cA-iCol;
	float rA_tilde = rA-iRow;

	if (iRow >= 0 && iRow < H) {
		if (iCol<0) 	iCol += W;
		if (iCol>=W) 	iCol -= W;

		int cA_tilde10 = (int)(10*cA_tilde);
		int rA_tilde10 = (int)(10*rA_tilde);

		int compR = d256x256*100 + cA_tilde10*10 + rA_tilde10;
		int compG = d256;
		*MotorLocalPointInfo = toARGBu(alpha, (uchar)(compR), (uchar)(compG), (uchar)(d1)); //Sets the alpha channel to the input quality, and RGB to the depth info
	}
	return distanceMetres;
}
uchar8 motorLocalNormalInfo2uchar(float nxMotorLocal, float nyMotorLocal, float nzMotorLocal, float normalScore)
{
	float scoreLog = round(log10(1.f/normalScore));
	int scoreInt = min((int)scoreLog, 255);	
	

	float2 sphericalCoord = getSphericalCoordinates(-nxMotorLocal, nzMotorLocal, nyMotorLocal, 0, 0, 0, 1.f, 0, 0, 0, 1.f, 0, 0, 0, 1.f);
	float theta_0_2PI = sphericalCoord.x;
	float phi_moinsPIsur2_PIsur2 = sphericalCoord.y;
	float phi_0_PI = phi_moinsPIsur2_PIsur2+0.5f*M_PI_F;

	int thetaIntMilliDeg = (int)(1000*(float)theta_0_2PI*(float)180/(float)M_PI_F);
	int phiIntMilliDeg   = (int)(1000*(float)phi_0_PI*(float)180/(float)M_PI_F);

	int theta256x256 = (int)(thetaIntMilliDeg/(256*256));		
	int theta256 	 = (int)((thetaIntMilliDeg-256*256*theta256x256)/256);
	int theta1 		 = thetaIntMilliDeg-256*theta256-256*256*theta256x256;

	int phi256x256 	 = (int)(phiIntMilliDeg/(256*256));		
	int phi256 	     = (int)((phiIntMilliDeg-256*256*phi256x256)/256);
	int phi1 		 = phiIntMilliDeg-256*phi256-256*256*phi256x256;

	uchar8 MotorLocalNormalInfo = (uchar8)(0);
	
	MotorLocalNormalInfo.s3 = (uchar)(255-scoreInt);
	MotorLocalNormalInfo.s2 = (uchar)(theta256x256);
	MotorLocalNormalInfo.s1 = (uchar)(theta256);
	MotorLocalNormalInfo.s0 = (uchar)(theta1);

	MotorLocalNormalInfo.s7 = (uchar)(255);
	MotorLocalNormalInfo.s6 = (uchar)(phi256x256);
	MotorLocalNormalInfo.s5 = (uchar)(phi256);
	MotorLocalNormalInfo.s4 = (uchar)(phi1);
	
	return MotorLocalNormalInfo;
}

float4 uchar2MotorLocalNormalInfo(uchar8 MotorLocalNormalInfo)
{
	float4 normalInfo = (float4)(0);
	int normalScoreLog = 255-convert_int(MotorLocalNormalInfo.s3);
	if (normalScoreLog==0)
	{
		return normalInfo;
	}
	normalInfo.w = normalScoreLog;
	
	
	int theta256x256 = convert_int(MotorLocalNormalInfo.s2);		
	int theta256 	 = convert_int(MotorLocalNormalInfo.s1);
	int theta1 		 = convert_int(MotorLocalNormalInfo.s0);	
	
		
	int phi256x256   = convert_int(MotorLocalNormalInfo.s6);	
	int phi256 	     = convert_int(MotorLocalNormalInfo.s5);
	int phi1 		 = convert_int(MotorLocalNormalInfo.s4);
	
	
	int thetaIntMilliDeg = theta256x256*256*256 + theta256*256 + theta1;
	int phiIntMilliDeg   = phi256x256*256*256 + phi256*256 + phi1;

	float thetaX = thetaIntMilliDeg*M_PI_F/(float)(180*1000);
	float thetaY = -0.5f*M_PI_F + phiIntMilliDeg*M_PI_F/(float)(180*1000);
	
	float3 Punit_realWorld = sphericalCoordinatesToCartesianCordinatesOnUnitSphere(thetaX, thetaY);
	normalInfo.x = -Punit_realWorld.x;
	normalInfo.y =  Punit_realWorld.z;
	normalInfo.z =  Punit_realWorld.y;
	
	return normalInfo;

}
float4 getNormalInfo(global const uchar4 *normals, int gx, int gy, int W, int H)
{
	uchar4 normalComponent_ThetaAndScore = normals[W*gy+gx];
	uchar4 normalComponent_Phi = normals[W*(gy+H)+gx];
	uchar8 MotorLocalNormalInfo = (uchar8)(0);
	
	MotorLocalNormalInfo.s0 = normalComponent_ThetaAndScore.x;
	MotorLocalNormalInfo.s1 = normalComponent_ThetaAndScore.y;
	MotorLocalNormalInfo.s2 = normalComponent_ThetaAndScore.z;
	MotorLocalNormalInfo.s3 = normalComponent_ThetaAndScore.w;
	MotorLocalNormalInfo.s4 = normalComponent_Phi.x;
	MotorLocalNormalInfo.s5 = normalComponent_Phi.y;
	MotorLocalNormalInfo.s6 = normalComponent_Phi.z;

	float4 normalInfo = uchar2MotorLocalNormalInfo(MotorLocalNormalInfo);
	
	return normalInfo;
}
float2 getPanoramicImageCoordinates_local(float3 relativeDirection, int imageMargin, int imageWidthIncludingMargin)
{
	float2 sphericalCoords = getSphericalCoordinates(-relativeDirection.x, relativeDirection.z, relativeDirection.y, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 1.f);							
	int W = imageWidthIncludingMargin-2*imageMargin;
	int H = (int)(W/2);
	float TWO_PI = 2.f*M_PI_F;
	float ratioW = 1.f-fmod(sphericalCoords.x, TWO_PI)/TWO_PI;
	float ratioH = 0.5f-sphericalCoords.y/M_PI_F;
	float row = ratioH*H;
	float col = imageMargin+ratioW*W;
	return (float2)(col, row);
}
int2 srcColRow2destColRow_Panoramic(int srcCol, int srcRow, int srcImageMargin, int srcWidthIncludingMargin, int destImageMargin, int destWidthIncludingMargin)
{
	int srcW_noMargin = srcWidthIncludingMargin-2*srcImageMargin;
	int srcH_noMargin = (int)(srcW_noMargin/2);
	
	int srcRow_noMargin = srcRow;
	int srcCol_noMargin = srcCol-srcImageMargin;
	
	int destWidth_noMargin = destWidthIncludingMargin-2*destImageMargin;
	int destHeight_noMargin = (int)(destWidth_noMargin/2);
	
	int destRow_noMargin = (int)round((float)srcRow_noMargin*(float)destHeight_noMargin/(float)srcH_noMargin);
	if (destRow_noMargin==destHeight_noMargin) destRow_noMargin = destHeight_noMargin-1;
	int destCol_noMargin = (int)round((float)srcCol_noMargin*(float)destWidth_noMargin/(float)srcW_noMargin);
	if (destCol_noMargin==destWidth_noMargin) destCol_noMargin = 0;
	
	int destRowIncludingMargin = destRow_noMargin;
	int destColIncludingMargin = destCol_noMargin+destImageMargin;
	
	return (int2)(destColIncludingMargin, destRowIncludingMargin);
}
/*float3 transformPoint(const float16 m, float3 aPoint)
{
	float3 P_transformed = (float3)(0.f, 0.f, 0.f);
	P_transformed.x = m.s0*aPoint.x + m.s1*aPoint.y + m.s2*aPoint.z  + m.s3;
	P_transformed.y = m.s4*aPoint.x + m.s5*aPoint.y + m.s6*aPoint.z  + m.s7;
	P_transformed.z = m.s8*aPoint.x + m.s9*aPoint.y + m.s10*aPoint.z + m.s11;
	return P_transformed;
}*/
/**
 * @return A new point transformed by matrix 'm'.
 */
float3 transformPoint_arrayVersion(constant float *m, const float3 p)
{
	float3 P_transformed = (float3)(0.f);
	P_transformed.x = m[0]*p.x + m[1]*p.y + m[2 ]*p.z + m[3];
	P_transformed.y = m[4]*p.x + m[5]*p.y + m[6 ]*p.z + m[7];
	P_transformed.z = m[8]*p.x + m[9]*p.y + m[10]*p.z + m[11];
	return P_transformed;
}
/*float3 transformVector(const float16 m, const float3 aVect)
{
	float3 V_transformed = (float3)(0);
	V_transformed.x = m.s0*aVect.x + m.s1*aVect.y + m.s2*aVect.z;
	V_transformed.y = m.s4*aVect.x + m.s5*aVect.y + m.s6*aVect.z;
	V_transformed.z = m.s8*aVect.x + m.s9*aVect.y + m.s10*aVect.z;
	return V_transformed;
}*/

float3 transformVector_arrayVersion(constant float *m, const float3 aVect)
{
	float3 V_transformed = (float3)(0);
	V_transformed.x = m[0]*aVect.x + m[1]*aVect.y + m[2]*aVect.z;
	V_transformed.y = m[4]*aVect.x + m[5]*aVect.y + m[6]*aVect.z;
	V_transformed.z = m[8]*aVect.x + m[9]*aVect.y + m[10]*aVect.z;
	return V_transformed;
}

/**
 * Return the coordinates of where a line intersects a plane (relative to origin), and the distance away from the origin.
 */
float4 lineIntersectsPlane(float3 pointOnPlane, float3 planeNormal, float3 pointOnLine, float3 lineDirection) {
	float3 vect1 = pointOnPlane - pointOnLine;
	float scal1 = dot(vect1, planeNormal);
	float scal2 = dot(lineDirection, planeNormal);
	if (fabs(scal2) < mathEpsilon) //planeNormal is parallel to lineDirection
	   return (float4)(0);
	float4 result;
	float lambda = scal1/scal2;
	result.xyz = pointOnLine.xyz + lambda * lineDirection.xyz;//point relative to origin (along the line)
	result.w = lambda;//Distance from origin
	return result;
}

float4 projectionPerspectiveSurPlan(float3 P, float3 pointOnPlane, float3 planeNormal, float3 centreProjection) {
   float3 dir = P - centreProjection;//Line between ?
   float norm = length(dir);//sqrt(dir.x*dir.x+dir.y*dir.y+dir.z*dir.z);
   if (fabs(norm) < mathEpsilon)
      return (float4)(0);
   dir /= norm; //Convert into unit vector
   float4 result = lineIntersectsPlane(pointOnPlane, planeNormal, centreProjection, dir);
   return result;
}

//This struct is not intended to be used in an array, simply to make passing as a param in functions
struct CPolyHeader {
	float3 O;//Origin of polygon
	float3 U;//x-axis direction
	float3 V;//y-axis direction
	float3 N;//Normal vector direction
	int nVertices; //Number of vertices in polygon
	float uMin; //The x-min
	float vMin; //The x-max
	float uMax; //The y-min
	float vMax; //The y-max
	constant float *afterHeaderBuffer; //Points to the "volumeParamBuffer" memory address just after the header, which should contain the vertex info.
	constant float *afterDataBuffer; //Points to the "volumeParamBuffer" memory address just after the data, which should point to the NEXT object (so you can skip over the current one or verify its contents length).
};
//Size of the header information read form a buffer, stored in CPolyHeader
int getSizePolyHeader() {
	return 17;
}
int getSizePolyData(struct CPolyHeader polyHeader) {
	return polyHeader.nVertices*7;
}
/*int getSizePolyInfo(struct CPolyHeader polyHeader) {
	return getSizePolyHeader() + getSizePolyData(polyHeader);
}*/

//Reads 17x 32-bit values and stores them in this header; lastly, points to the beginning and end of its "data" memory address range.
struct CPolyHeader readPolyHeader(constant float *volumeParamBuffer) {
	uint p = 0;
	struct CPolyHeader polyHeader;
	polyHeader.nVertices = readInt(&p, volumeParamBuffer);
	polyHeader.O = readFloat3(&p, volumeParamBuffer);//This method reads 3 floats individually, ensuring never to misalign memory access.
	polyHeader.U = readFloat3(&p, volumeParamBuffer);
	polyHeader.V = readFloat3(&p, volumeParamBuffer);
	polyHeader.N = readFloat3(&p, volumeParamBuffer);
	polyHeader.uMin = readFloat(&p, volumeParamBuffer);
	polyHeader.vMin = readFloat(&p, volumeParamBuffer);
	polyHeader.uMax = readFloat(&p, volumeParamBuffer);
	polyHeader.vMax = readFloat(&p, volumeParamBuffer);
	polyHeader.afterHeaderBuffer = volumeParamBuffer + p;//Store the memory address after finished reading header
	polyHeader.afterDataBuffer = polyHeader.afterHeaderBuffer + getSizePolyData(polyHeader);//Store the memory address after finished reading data
	return polyHeader;
}

/**@param point1, Input 3D input point.
/**@param cubeOrigin, The cube's origin.
 * @param dist The edge length of the cude.
 * @return True if the 2 points are within the axis-aligned distance of one another.
 */
bool containedInCube(float3 point, float3 cubeOrigin, float boxWidth) {
	float3 diff = point - cubeOrigin;
	return (diff.x >= 0.f && diff.x <= boxWidth &&
			diff.y >= 0.f && diff.y <= boxWidth &&
			diff.z >= 0.f && diff.z <= boxWidth);
}

/**Check if a point is contained within a 3D box.*/
bool containedInBox(float3 point, float3 boxOrigin, float3 boxWidth) {
	float3 diff = point - boxOrigin;
	return (diff.x >= 0.f && diff.x <= boxWidth.x &&
			diff.y >= 0.f && diff.y <= boxWidth.y &&
			diff.z >= 0.f && diff.z <= boxWidth.z);
}

/**Check if 2 points are within a 3D distance relative to each other, i.e. no origin specified. */
bool containedInDist3D(float3 point1, float3 point2, float3 dist) {
	return !(fabs(point1.x-point2.x) > dist.x ||
			 fabs(point1.y-point2.y) > dist.y ||
			 fabs(point1.z-point2.z) > dist.z);
}
/** Project a point along a normal direction to a plane.
 * @param planeNormal Unit normal vector to the plane.
 */
float3 projectionSurPlan(float3 P, float3 pointOnPlane, float3 planeNormal) {
	float3 P0P = P - pointOnPlane;
	float scal = dot(P0P, planeNormal);
	return P - scal*planeNormal;
}

/**Was named "intersectionDroitePlan". Finds the point where given a line direction, intersects a plane oriented in another specified normal direction.
 * @param pp Any point lying on the plane.
 * @param n Unit normal vector defining the plane to project onto.
 * @param eye A point on the line, to project along the line direction onto plane n.
 * @param line A unit normal vector emanating from point eye, defining the direction to project point eye onto plane n.
 * @param bIsHalfRay If true, then the line's direction must be facing toward the plane or it won't intersect. False indicates that a negative fDist can be returned.
 * @param outputPoint The point eye projected onto the plane. (float3)INFINITY if the line is parallel to the plane (perpendicular to the plane's normal) or ray is facing wrong direction.
 * @param fDist Output, scalar distance along the line between point P and the intersection point.
 * @param bIsOnNormalSide Output, true if the point lies on the positive half-space created by the plane bisection (depends upon normal's alignment with line).
 * @return false if the line didn't intersect the plane (parallel or wrong ray direction), otherwise true.
 * This function is very trustworthy.*/
bool intersectPlaneLine(const float3 pp, float3 n, const float3 eye, const float3 line, const bool bIsHalfRay, float3 *outputPoint, float *fDist, bool *bIsOnNormalSide) {
	float3 CP = eye - pp; //Displacement to eye from pp.
	float scal1 = dot(CP, n); //How much of the point's displacement is in the direction of the plane?
	if (bIsHalfRay && bIsOnNormalSide != NULL)
		*bIsOnNormalSide = (scal1 >= 0.f); //True if point lies in the positive half-space (say true if lies ON the plane exactly too)
	if (scal1 == 0.f) { //The point already lies on the plane, early-out
		if (fDist != NULL) *fDist = 0.f;
		if (outputPoint != NULL) *outputPoint = eye; //No need to project
		return true;
	}
	float scal2 = dot(line, n); //How aligned are the line and normal vectors?
	bool bFailed = (scal2 == 0.f); //The plane and the line are perpendicular, NEVER intersect
	float lambda;
	if (!bFailed) { //Calculate the scale
		lambda = -scal1 / scal2; //Compute the distance scale we need to apply to the point along its unit line vector to reach the plane.
		bFailed = (bIsHalfRay && lambda < 0.f); //Check if the half-ray is in the wrong direction! DUNN verified theoretically and by testing. Negative lambda indicates intersection BEHIND point eye along line.
	}
	if (bFailed) { //The plane and the line are perpendicular, NEVER intersect
		if (outputPoint != NULL) *outputPoint = (float3)(INFINITY);
		lambda = INFINITY;
	}
	else if (outputPoint != NULL) //Intersected the plane, should have a finite intersection point
		*outputPoint = eye + lambda * line; //The point where intersection occurred at.
	if (fDist != NULL) *fDist = lambda; //Output the distance for both cases
	return !bFailed;
}

/**
* Does a point lie "on" a polygon surface (within a tolerance)?
* @param point 3D point to check if it "touches" the single polygon.
* @param polyHeader The struct containing header info about the polygon.
* @param bProjected True if the "point" has already been projected, so the step can be skipped
*/
bool belongsToPolygon(float3 point, const struct CPolyHeader polyHeader, bool bProjected) {
	float3 edgeUnitVect;
	float3 P1;
	uint p = 0; //Streaming address pointer
	float edgeLen;

	float3 OP = point - polyHeader.O;
	float scalU = dot(OP, polyHeader.U);
	float scalV = dot(OP, polyHeader.V);
	float scalN = dot(OP, polyHeader.N);
	if ((scalU < polyHeader.uMin) || (scalU > polyHeader.uMax) || (scalV < polyHeader.vMin) || (scalV > polyHeader.vMax)
			|| (scalN < -tolerance) || (scalN > tolerance)) { //Within the bounding box of the polygon?
		return false; //NOT touching the polygon
	}
	float3 pointProj;
	if (bProjected) //Point is already projected?
		pointProj = point;
	else {
		pointProj = polyHeader.O + scalU*polyHeader.U + scalV*polyHeader.V;
	}
	int nbIntersections = 0;
	for (int i = 0; i < polyHeader.nVertices; i++) {
		P1				= readFloat3(&p, polyHeader.afterHeaderBuffer); //Vertex location
		edgeUnitVect	= readFloat3(&p, polyHeader.afterHeaderBuffer); //Edge unit direction vector
		edgeLen			= readFloat (&p, polyHeader.afterHeaderBuffer); //Edge length
		if (edgeLen > epsilon) { //Only consider an edge if it is long enough
			float scal = dot(polyHeader.U, edgeUnitVect);
			if (fabs(fabs(scal)-1.f) >= epsilon) { //General case:
				float3 crossProd = cross(polyHeader.U, edgeUnitVect);
				float norm = length(crossProd);
				crossProd /= norm;//Normalized normal vector
				float3 w = cross(polyHeader.U, crossProd);
				float3 PP1 = P1 - pointProj;
				float dist = dot(PP1, crossProd);
				if (fabs(dist) < tolerance) {
					float dotWithW = dot(edgeUnitVect, w);
					float lambda = -(PP1.x * w.x + PP1.y * w.y + PP1.z * w.z)/dotWithW;
					if ((lambda >= 0.0f) && (lambda <= edgeLen)) {
						float dotWithU = dot(edgeUnitVect, polyHeader.U);
						float mu = (PP1.x * polyHeader.U.x + PP1.y * polyHeader.U.y + PP1.z * polyHeader.U.z) + lambda * dotWithU;
						if (mu >= 0.0f)
							nbIntersections++;
					}
				}
			}
		}
	}
	return (nbIntersections % 2 != 0); //Contained inside the polygon if number of edge intersections is ODD.
}

/**
* @return interDansPlan where xyz point in 3D from the origin, w is the distance from the eye along direction.
*/
float4 intersectionDroitePolygone(float3 pointOnLine, float3 lineDirection, const struct CPolyHeader polyHeader) {
	//Gets the point where it intersects an infinite plane, and the distance away that the point is from the "eye" (pointOnLine)
	float4 interDansPlan = lineIntersectsPlane(polyHeader.O, polyHeader.N, pointOnLine, lineDirection);
	if ((interDansPlan.x==0.f) && (interDansPlan.y==0.f) && (interDansPlan.z==0.f) && (interDansPlan.w==0.f))
		return (float4)(0); //The line is parallel to the plane

	//Check if point lies on the polygon surface within a tolerance.
	bool bInsidePolygon = belongsToPolygon(interDansPlan.xyz, polyHeader, false);
	return (!bInsidePolygon) ? (float4)(0.f) : interDansPlan;//If outside, return 0's, otherwise return the point.
}

/** Determine if a point lies within a 2D triangle.
* @param p The point to test.
* @param p0 First vertex of triangle.
* @param p1 Second vertex of triangle.
* @param p2 Third vertex of triangle.
* @return true if point p touches the triangle.
*/
/*bool belongsToTriangle2D(float2 p, float2 p0, float2 p1, float2 p2) {
    float A = 1/2 * (-p1.y * p2.x + p0.y * (-p1.x + p2.x) + p0.x * (p1.y - p2.y) + p1.x * p2.y);
    float sgn = A < 0 ? -1 : 1;
    float s = (p0.y * p2.x - p0.x * p2.y + (p2.y - p0.y) * p.x + (p0.x - p2.x) * p.y) * sgn;
    float t = (p0.x * p1.y - p0.y * p1.x + (p0.y - p1.y) * p.x + (p1.x - p0.x) * p.y) * sgn;

    return ((s > 0) && (t > 0) && (s + t) < (2 * A * sgn));
}*/
/**
* Determine if a point lies near a 3D line.
* @param p The point to test collision with the line.
* @param a First vertex of line.
* @param b Second vertex of line.
* @return Distance to the line, 0.f if point p "touches" the line.
*/
float distanceToSegment2D(const float2 p, const float2 a, const float2 b) {
	const float2 ab = b-a; //Edge line
	const float len = length(ab); //unit vector along edge
	const float2 ap = p-a; //distance to unprojected point
	if (len == 0.f) //(a == b)
		return length(ap); //Invalid line with zero length, return distance to first vertex.
	float2 e = ab/len; //Line's unit vector
	float dist = dot(ap, e);//Can be +/-, should be zero if perpendicular to line passing through point A
	if (dist <= 0.f) //Lies below point A
		return length(ap);
	else if (dist >= len) //Lies beyond point B
		return distance(p,b);
	return distance(ap, dist*e); //length of: Line AP minus its projection onto the line AB
}

/**
* Determine if a point lies near a 3D line.
* @param p The point to test collision with the line.
* @param a First vertex of line.
* @param b Second vertex of line.
* @return Distance to the line, 0.f if point p "touches" the line.
*/
float distanceToSegment3D(const float3 p, const float3 a, const float3 b) {
	const float3 ab = b-a; //Edge line
	const float len = length(ab); //unit vector along edge
	const float3 ap = p-a; //distance to unprojected point
	if (len == 0.f) //(a == b)
		return length(ap); //Invalid line with zero length, return distance to first vertex.
	float3 e = ab/len; //Line's unit vector
	float dist = dot(ap, e);//Can be +/-, should be zero if perpendicular to line passing through point A
	if (dist <= 0.f) //Lies below point A
		return length(ap);
	else if (dist >= len) //Lies beyond point B
		return distance(p,b);
	return distance(ap, dist*e); //length of: Line AP minus its projection onto the line AB
}

float distanceToPolyLine3D(constant float *volumeParamBuffer, const float3 point) {
	//unpack the serialized stream:
	uint p = 0;//Assume incoming pointer is already at the head of the volume stream.
	const int numVertices = readInt(&p, volumeParamBuffer);
	float3 pointA = readFloat3(&p, volumeParamBuffer);
	float3 pointB = readFloat3(&p, volumeParamBuffer);
	float fDist = INFINITY;
	for (int i = 1; ;i++) {
		fDist = min(fDist, distanceToSegment3D(point, pointA, pointB));
		if (i >= numVertices-1) //Reached the end of the vertices?
			break;
		pointA = pointB; //Set this point to the value of the previous one
		pointB = readFloat3(&p, volumeParamBuffer);
	}
	return fDist;
}
/**
* Determine the distance of a point to a 3D triangle. Uses the Barycentric coordinates which
* are proportional to the areas of the 3 subtriangles created by point 'p'.
* https://math.stackexchange.com/questions/4322/check-whether-a-point-is-within-a-3d-triangle
* @param p The point to test collision with the triangle.
* @param a First vertex of triangle.
* @param b Second vertex of triangle.
* @param c Third vertex of triangle.
* @param halfThickness Early-out thickness check. Can be quite large.
* @return Distance to the triangle between a range o 0.f (if point p "touches" the triangle) and fThreshold.
*/
float distanceToTriangle3D(const float3 p, const float3 a, const float3 b, const float3 c, const float fThreshold) {
	const float3 ab = b-a; //first edge
	const float3 ac = c-a; //2nd edge
	float3 n  = cross(ab,ac); //Normal to the triangle's plane
	const float nMag2 = dot(n,n);//length-squared of 'n'
	const float3 ap = p-a; //distance to unprojected point
	if (nMag2 == 0.f) //(ab == ac)
		return length(ap);//Invalid triangle with NO area, return distance to first vertex.
	//n /= nMag;//Make into unit vector
	float n_dist = dot(ap,n/length(n));//Can be +/-, should be zero if perpendicular to normal, i.e. lies directly on the triangular plane.
	if (fabs(n_dist) >= fThreshold)//The point is significantly far from the triangle's plane.
		return fabs(n_dist);//TODO: This is only the normal distance, not including lateral direction.
	//Correct its position so we're essentially working in the triangle's fully "2D" plane now.
	const float3 pp = p - (n_dist/length(n)) * n;//P projected onto the triangle's plane.
	float3 app = pp-a;//recalculate AFTER projecting onto triangle's plane!! Very important!
	//const float A  = sqrt(nMag2)/2.f; //Full area of the triangle
	//const float alpha = dot(cross(cp,cb),n)/nMag2; //1st sub-triangle area
	const float gamma = dot(cross(ab,app),n)/nMag2; //3rd sub-triangle area
	const float beta  = dot(cross(app,ac),n)/nMag2; //2nd sub-triangle area
	const float alpha = 1.f-gamma-beta; //1st sub-triangle area
	float fDist = INFINITY;

	float3 toP, toP2, edge, edgeU;
	float edgeL, edgeP;

	if (alpha < 0.f) { //a-edge
		toP   = p-b; //Distance from near-corner to P
		toP2  = p-c; //Distance from far-corner to P
		edge  = c-b; //Edge vector
	}
	else if (beta < 0.f) { //b-edge
		toP   = ap;
		toP2  = p-c;
		edge  = ac;
	}
	else if (gamma < 0.f) { //c-edge
		toP   = ap;
		toP2  = p-b;
		edge  = ab;
	}
	else
		return fabs(n_dist); //lies within the plane, return only perpendicular distance.
	edgeL = length(edge);
	edgeU = edge/edgeL;
	edgeP = dot(toP, edgeU);//P's Component along the edge
	if (edgeP < 0.f) //negative edge from corner
		return length(toP); //near-corner
	else if (edgeP > edgeL) // Beyond edge (at far corner)
		return length(toP2); //far-corner
	else // Use the edge method
		return length(toP - edgeU*edgeP); //along the edge
}

/**Does a ray emitted from a point intersect a triangle?
 * line Unit direction vector (half-ray)! Must be oriented in the correct half-ray direction!
 * @param p The line source point.
 * @param outputPoint The point's projection onto the triangle's plane (may or may NOT lie within the triangle's area). Pointer to be populated with the output projection point, use NULL to ignore.
 * @param fDist Output, scalar distance along the line between point P and the intersection point. Distance to the triangle's plane (even if did NOT intersect the triangle's area).
 * Output is positive if the ray is aligned opposite of the plane's normal.
 * @param bIsOnNormalSide Output, true if the point lies on the positive half-space created by the plane bisection (depends upon normal's alignment with line).
 */
bool intersectsTriangle3D(const float3 p, const float3 line, const float3 a, const float3 b, const float3 c, float3 *outputPoint, float *fDist, bool *bIsOnNormalSide) {
	const float3 ab = b-a; //first edge
	const float3 ac = c-a; //2nd edge
	float3 n  = cross(ab,ac); //Normal to the triangle's plane
	float nMag = length(n);
	if (nMag == 0.f) //(ab == ac)
		return false; //Invalid triangle with NO area
	const float eps = 5E-7f/nMag; //Optimized this numerical tolerance (of area ratio) specifically using the Peugeot triangular mesh to minimize ray "leaking" between neighboring triangles. But it was never perfect and both increasing/decreasing the tolerance further makes it worse.
	const float nMag2 = nMag*nMag; //length-squared of 'n'
	//Correct its position so we're essentially working in the triangle's "2D" plane now.
	float3 pp;
	bool bIntersect = intersectPlaneLine(a, n/nMag, p, line, true, &pp, fDist, bIsOnNormalSide); //Project point along line onto plane n
	if (!bIntersect)
		return false; //Didn't even intersect the triangle's PLANE
	float3 app = pp-a; //Translate point, get it relative to point 'a'
	const float gamma = dot(cross(ab,app),n)/nMag2; //3rd sub-triangle area
	const float beta  = dot(cross(app,ac),n)/nMag2; //2nd sub-triangle area
	const float alpha = 1.f-gamma-beta; //1st sub-triangle area (since all 3 areas should sum to 1), this could be a large source of numerical roundoff if each area has vastly different precisions
	if (alpha >= -eps && beta >= -eps && gamma >= -eps) { //All the areas should be positive and sum to 1, including a tolerance helps "grow" their intersections areas a little as needed to prevent leaking between triangular mesh faces.
		if (outputPoint != NULL)
			*outputPoint = pp; //Save the output result
		return true; //Intersected the triangle
	}
	return false; //Did not intersect the triangle
}
//TODO:
bool intersectsRect3D(const float3 p, const float3 line, const float3 a, const float3 b, const float3 c, const float3 d, float3 *outputPoint, float *fDist, bool *bIsOnNormalSide) {
	return false;
}
/**
 * @return The [max,min] distances from the eye along direction where the box surface intersected. Both may be -INF to INF.
 */
//TODO: Finish this for AABB:
float2 intersectsAABB(float fHalfX, float fHalfH, float fHalfZ, float3 volLocalEye, float3 volLocalDir) {
	const float2 distInit = (float2)(-INFINITY,INFINITY);//The initial [max,min] distances along the line (inverted temporarily until end of function)
	float2 fMaxMinDist = distInit; //Output will be the initial [max,min].
	float3 low  = (float3)(-fHalfX, -fHalfH, -fHalfZ);
	float3 high = (float3)( fHalfX,  fHalfH,  fHalfZ);
	float3 inter;
	float3 n;
	float fDist = INFINITY;
	for (int i = 0; i < 6; i++) {
		float3* pp = (i % 2 == 0) ? &high : &low;
		switch (i) { //Sign of the normal doesn't matter:
		case 0:
		case 1:
			n = (float3)(1.f, 0.f, 0.f); break; //x-axis normal
		case 2:
		case 3:
			n = (float3)(0.f, 1.f, 0.f); break; //y-axis normal
		case 4:
		case 5:
			n = (float3)(0.f, 0.f, 1.f); break; //z-axis normal
		}
		if (!intersectPlaneLine(*pp, n, volLocalEye, volLocalDir, false, &inter, &fDist, NULL))
			continue; //Invalid, NO intersection was found, line parallel to plane?
		//Now check if the point lies within that plane
		switch (i) {
		case 0:
		case 1:
			if (fabs(inter.y) > high.y || fabs(inter.z) > high.z)
				continue;
		break;
		case 2:
		case 3:
			if (fabs(inter.x) > high.x || fabs(inter.z) > high.z)
				continue;
		break;
		case 4:
		case 5:
			if (fabs(inter.x) > high.x || fabs(inter.y) > high.y)
				continue;
		break;
		}
		//An intersection was found! Update BOTH the [max, min] values
		if (fDist > fMaxMinDist.s0)
			fMaxMinDist.s0 = fDist; //new max
		if (fDist < fMaxMinDist.s1)
			fMaxMinDist.s1 = fDist; //new min
	}

	if (fMaxMinDist.s0 == distInit.s0) //No intersection found at all, max value was unchanged
		fMaxMinDist = (float2)(INFINITY,-INFINITY);
	//convert180 handles all the cases (such as both being the same value)
	return fMaxMinDist;
}
/** Determine the distance of a point to a 3D quad.
* @param p The point to test collision with the quad.
* @param a First vertex of quad.
* @param b Second vertex of quad.
* @param c Third vertex of quad.
* @param c Fourth vertex of quad.
* @param halfThickness Early-out thickness check. Can be quite large.
* @return Distance to the quad between a range o 0.f (if point p "touches" the quad) and fThreshold
 * */
float distanceToQuad3D(const float3 p, const float3 a, const float3 b, const float3 c, const float3 d, const float fThreshold) {
	return INFINITY; //Return the distance between point p and quad abcd.
}

/**
* Paint a point light/dark color depending on where it is in the global Cartesian grid.
* @param pointGlobal The 3D point whose color is to be determined.
* @param gridSpacing Space between the gridlines (in meters).
* @param lineW Grid-line thickness as a fraction of the gridSpacing.
* @param colorLight Color to return if the point falls on a grid line.
* @param colorDark Color to return if the point doesn't fall on a grid line.
* @return the color to use for this pixel.
*/
uchar4 getGridColor(float3 pointGlobal, float gridSpacing, float lineW, uchar4 colorLight, uchar4 colorDark) {
	//recordOut.m_depth = motorLocalPoint2uchar(pointLocal.x, pointLocal.y, pointLocal.z, W, H, alpha);//Get the pixel pack for the scanorama
	//if ((remainder(pointLocal.x, gridW) < 0.5/gridW) /*&& //WORKS for gridW > 1.0
	//recordOut.m_destDistance = maskDist;
	//pointLocal = maskDist * dirFromDestToPoint;
	//recordOut.m_depth = motorLocalPoint2uchar(pointLocal.x, pointLocal.y, pointLocal.z, W, H, alpha);//Get the pixel pack for the scanorama
	//Highlight grid lines based on global grid:*/
	if ((pointGlobal.x/gridSpacing - floor(pointGlobal.x/gridSpacing)) > lineW && //WORKS for gridW < 1.0
		(pointGlobal.y/gridSpacing - floor(pointGlobal.y/gridSpacing)) > lineW &&
		(pointGlobal.z/gridSpacing - floor(pointGlobal.z/gridSpacing)) > lineW) {
		//return toARGBu(255,32,128,32);//draw dark region between grid lines
		return colorDark;//draw dark region between grid lines
	}
	//else
	return colorLight; //draw light line
	//recordOut.m_volColor = toARGBf(255,32,(int)(127*(recordOut.m_align+1.f)),32);//colors based on alignment (-1 to 1)
}

/**
* Paint a checkered light/dark color depending on where it is in the global Cartesian grid.
* @param pointGlobal The 3D point whose color is to be determined.
* @param gridSpacing Space between the checkers (in meters).
* @param colorLight Color to return if the point falls on a light square.
* @param colorDark Color to return if the point doesn't fall on a dark square.
* @return the color to use for this pixel.
*/
uchar4 getCheckerColor(float3 pointGlobal, float gridSpacing, uchar4 colorLight, uchar4 colorDark) {
	if ((((int)(pointGlobal.x/gridSpacing)) + //WORKS for gridW < 1.0
		((int)(pointGlobal.y/gridSpacing)) +
		((int)(pointGlobal.z/gridSpacing)))%2 == 0) {
		return colorDark;//draw dark color
	}
	//else
	return colorLight; //draw light color
}

/**
* Paint a diagonal striped light/dark color depending on where it is in the global Cartesian grid.
* @param pointGlobal The 3D point whose color is to be determined.
* @param gridSpacing Distance spanning a pair of stripes (in meters).
* @param colorLight Color to return if the point falls on a light stripe.
* @param colorDark Color to return if the point falls on a dark stripe.
* @return the color to use for this pixel.
*/
uchar4 getStripeColor(float3 pointGlobal, float gridSpacing, uchar4 colorLight, uchar4 colorDark) {
	float3 d = fmod(fabs(pointGlobal), gridSpacing)/gridSpacing; //returns values between 0-1 only
	float dxy = d.y - (d.x + d.z);
	if ((dxy >= 1.5f))
		return colorDark; //draw dark color
	else if ((dxy >= 1.0f))
		return colorLight; //draw light color
	else if ((dxy >= 0.5f))
		return colorDark; //draw dark color
	else if ((dxy >= 0))
		return colorLight; //draw light color
	else if ((dxy >= -0.5f))
		return colorDark; //draw dark color
	else if ((dxy >= -1.f))
		return colorLight; //draw light color
	else if ((dxy >= -1.5f))
		return colorDark; //draw dark color
	//else
	return colorLight; //draw light color
}

/*uchar4 getFogColor(float3 pointGlobal, float distance, uchar4 colorLight, uchar4 colorDark) {
	return toARGBf(255,32,32,32);//colors based on alignment (-1 to 1)
}*/

/**
* Paint a global Cartesian pattern (grid-line, checkered, etc.) using colors and sizes specified in the input struct.
* @param pointGlobal The 3D point whose color is to be determined.
* @param params The param struct describing how to paint the pixel.
* @return the color to use for this pixel.
*/
uchar4 getDemoColor(float3 pointGlobal, struct stDrawParams params) {
	switch(params.m_iPattern) {
		case 1:
			return getGridColor(pointGlobal, params.m_fLineSpacing, params.m_fLineWidth, params.m_iColorLight, params.m_iColorDark);
		case 2:
			return getCheckerColor(pointGlobal, params.m_fLineSpacing, params.m_iColorLight, params.m_iColorDark);
		case 3:
			return getStripeColor(pointGlobal, params.m_fLineSpacing, params.m_iColorLight, params.m_iColorDark);
		/*case 4:
			return getFogColor(pointGlobal, params.m_iColorLight, params.m_iColorDark);*/
	}
	return params.m_iColorLight;//Default method for solid color pattern
}

/*THIS IS A BUILT-IN FUNCTION:
Clamp a value (fVal) between a minimum (fMin) and maximum (fMax) range.
float clamp(float fVal, float fMin, float fMax) {
	return (fVal < fMin ? fMin : (fVal > fMax ? fMax : fVal));
}*/

/**Given a dimension (0,1,2) and direction (0,1), create a 3D unit direction vector representing that direction.
 * @param dim The desired dimension: 0,1,2 for x,y,z
 * @param dir The desired direction: 0/1 for negative/positive axis direction.
 */
void dimDirToStep(int dim, int dir, int3 *output) {
	//Never let the output stay all zeros, dangerous when used for stepping in (infinite) loop
	int dirStep = (dir == 0) ? -1 : 1;
	switch (dim) {
	case 0: *output = (int3)(dirStep,0,0); break;
	case 1: *output = (int3)(0,dirStep,0); break;
	case 2: *output = (int3)(0,0,dirStep); break;
	default: *output = (int3)(10000,10000,10000); break;//Don't let it stay all zeros, dangerous when used for stepping in infinite loop
	}
}
/**Uncompress dimDir into its dimension and direction.
 * @param dimDir 0,1,2,3,4,5 = -x,+x,-y,+y,-z,+z
 * @param output The unit direction vector corresponding to the "dimDir" axis.
 */
void dimDirToInt3(int dimDir, int3 *output) {
	int dir = (dimDir % 2 == 0) ? -1 : 1;
	int dim = dimDir / 2;
	dimDirToStep(dim,dir,output);
}

