/*Some notes about OpenCL.
 * Many GPU's (Quadro included) use little-Endian byte ordering, and Java uses Big-Endian by default, luckily the
 * Jogamp library already converts the CLBuffers whenever necessary; however, sending custom raw byte data requires
 * paying attention to this.
 */

#ifndef __OPENCL_VERSION__ //This should fail during an actual openCL compile, used only to trick Eclipse into syntax highlighting this file as "C" code.
#define __kernel
#define __global
#define kernel
#define global
#define constant
#define local
#define float2 float
#define int3 int
#define float3 float
#define float4 float
#define uchar4 char
#endif

kernel void test_solveLinearSystem_dimension3(global const float *As, global const float *Bs, global float *clResultBuffer, const int numTasks)
{
	const int iWorker = get_global_id(0); //The worker ID
	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iWorker >= numTasks)  
		return;
		
	int startPosInAs = 9 * iWorker;
	float A[9];
	for (int k=0; k<9; k++)
		A[k] = As[startPosInAs + k];
		
	float B[3];
	int startPosInBs = 3 * iWorker;
	for (int k=0; k<3; k++)
		B[k] = Bs[startPosInBs + k];
		
	float x[3] = { infinity };

	bool bOk = solve(A, B, x, 3);
	int startPosInResult = 3 * iWorker;
	clResultBuffer[startPosInResult+0] = x[0];
	clResultBuffer[startPosInResult+1] = x[1];
	clResultBuffer[startPosInResult+2] = x[2];
}
kernel void test_solveLinearSystem_dimension4(global const float *As, global const float *Bs, global float *clResultBuffer, const int numTasks)
{
	const int iWorker = get_global_id(0); //The worker ID
	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iWorker >= numTasks)  
		return;
		
	int startPosInAs = 16 * iWorker;
	float A[16];
	for (int k=0; k<16; k++)
		A[k] = As[startPosInAs + k];
		
	float B[4];
	int startPosInBs = 4 * iWorker;
	for (int k=0; k<4; k++)
		B[k] = Bs[startPosInBs + k];
		
	float x[4] = { infinity };

	bool bOk = solve(A, B, x, 4);
	int startPosInResult = 4 * iWorker;
	clResultBuffer[startPosInResult+0] = x[0];
	clResultBuffer[startPosInResult+1] = x[1];
	clResultBuffer[startPosInResult+2] = x[2];
	clResultBuffer[startPosInResult+3] = x[3];
}
