kernel void test_UseLocalFloatTable(__global const float4* listPoints, __global float* results, const int numPoints, 
 						__local float* localTable, const int localTableSize) 
{
	// get index into global data array
 	int iGID = get_global_id(0);

	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iGID >= numPoints)  
 	{
		return;
	}
	float4 lePoint = listPoints[iGID];
	
	for (int i=0; i<localTableSize; i++)
	{
		localTable[i] = (float)(i*iGID);
	}
	
	int resultRecordSize = 4+localTableSize;

	results[iGID*resultRecordSize + 0] = lePoint.x;
	results[iGID*resultRecordSize + 1] = lePoint.y;
	results[iGID*resultRecordSize + 2] = lePoint.z;
	results[iGID*resultRecordSize + 3] = lePoint.w;
	for (int i=0; i<localTableSize; i++)
	{
		results[iGID*resultRecordSize + 4 + i] = localTable[i];
	}
}

kernel void test_WriteAsFlatTable(global const float4* listPoints, global float* results, const int numPoints) 
{
	// get index into global data array
 	int iGID = get_global_id(0);

	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iGID >= numPoints)  
 	{
		return;
	}
	float4 lePoint = listPoints[iGID];
	results[iGID*4 + 0] = lePoint.x;
	results[iGID*4 + 1] = lePoint.y;
	results[iGID*4 + 2] = lePoint.z;
	results[iGID*4 + 3] = lePoint.w;
}