Don't waste too much time evaluating the "score" for the "native CPU"
This code took tens of second typically, which is utterly pointless. Instead run the loop for max a second, after which estimate how long it would have taken to process all of the data. Don't confuse the "native CPU" term here with a CPU-based device of an actual OpenCL implementation. That is a completely different thing. This "native CPU" thing here is just our way to estimate how much time it takes to do calculations using normal C++ code on the CPU. Change-Id: I92f5eedc06bbaaef6a9b5322fefec9d41f0db505 Reviewed-on: https://gerrit.libreoffice.org/26774 Reviewed-by: Michael Meeks <michael.meeks@collabora.com> Tested-by: Jenkins <ci@libreoffice.org>
This commit is contained in:
@@ -327,7 +327,8 @@ ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOffice
|
|||||||
timer kernelTime;
|
timer kernelTime;
|
||||||
timerStart(&kernelTime);
|
timerStart(&kernelTime);
|
||||||
|
|
||||||
for (unsigned long j = 0; j < testData->outputSize; j++)
|
unsigned long j;
|
||||||
|
for (j = 0; j < testData->outputSize; j++)
|
||||||
{
|
{
|
||||||
double fAverage = 0.0f;
|
double fAverage = 0.0f;
|
||||||
double fMin = DBL_MAX;
|
double fMin = DBL_MAX;
|
||||||
@@ -340,15 +341,25 @@ ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOffice
|
|||||||
}
|
}
|
||||||
fAverage /= testData->inputSize;
|
fAverage /= testData->inputSize;
|
||||||
testData->output[j] = fAverage + (fMin * fSoP);
|
testData->output[j] = fAverage + (fMin * fSoP);
|
||||||
|
// Don't run for much longer than one second
|
||||||
|
if (j > 0 && j % 100 == 0)
|
||||||
|
{
|
||||||
|
rDevice.fTime = timerCurrent(&kernelTime);
|
||||||
|
if (rDevice.fTime >= 1)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rDevice.fTime = timerCurrent(&kernelTime);
|
||||||
|
|
||||||
|
// Scale time to how long it would have taken to go all the way to outputSize
|
||||||
|
rDevice.fTime /= ((double) j / testData->outputSize);
|
||||||
|
|
||||||
// InterpretTail - the S/W fallback is nothing like as efficient
|
// InterpretTail - the S/W fallback is nothing like as efficient
|
||||||
// as any good openCL implementation: no SIMD, tons of branching
|
// as any good openCL implementation: no SIMD, tons of branching
|
||||||
// in the inner loops etc. Generously characterise it as only 10x
|
// in the inner loops etc. Generously characterise it as only 10x
|
||||||
// slower than the above.
|
// slower than the above.
|
||||||
float fInterpretTailFactor = 10.0;
|
float fInterpretTailFactor = 10.0;
|
||||||
|
|
||||||
rDevice.fTime = timerCurrent(&kernelTime);
|
|
||||||
rDevice.fTime *= fInterpretTailFactor;
|
rDevice.fTime *= fInterpretTailFactor;
|
||||||
rDevice.bErrors = false;
|
rDevice.bErrors = false;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user