Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple functions speed up #563

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d42a688
Build Framework (NeoML-master 1.0.45.0): Incrementing version number.
NeoML-maintainer Jan 14, 2021
609aa76
Merge remote-tracking branch 'upstream/master'
yekatkov Jan 19, 2021
5c1519b
Merge remote-tracking branch 'upstream/master'
yekatkov Feb 15, 2021
d7d2e6c
Merge remote-tracking branch 'upstream/master'
yekatkov Feb 19, 2021
b509b4d
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Jun 7, 2021
c6382a4
Merge remote-tracking branch 'upstream/master'
yekatkov Jun 15, 2021
1ce6387
Merge remote-tracking branch 'upstream/master'
yekatkov Jun 21, 2021
613df30
Build Framework (NeoML-master 1.0.45.0): Incrementing version number.
NeoML-maintainer Jan 14, 2021
87cee43
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Jun 25, 2021
fa7aace
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Jul 15, 2021
b7d2e16
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Aug 6, 2021
7f7b3ee
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Aug 25, 2021
9f24e3e
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Oct 13, 2021
eb64373
Merge branch 'master' of https://github.com/yekatkov/neoml
yekatkov Oct 27, 2021
f098e0c
Merge remote-tracking branch 'upstream/master'
yekatkov Oct 27, 2021
bc0797d
Merge remote-tracking branch 'upstream/master'
yekatkov Nov 15, 2021
a34183f
Merge remote-tracking branch 'upstream/master'
yekatkov Feb 8, 2022
b0257d0
Implement some fimple functions in JIT.
yekatkov Feb 3, 2022
fe4f83d
Some function were replaced by analogue from cstdlib.
yekatkov Feb 9, 2022
de05811
Fix vectorFill using
yekatkov Feb 9, 2022
00866c2
Add dummy definitions for simple math functions.
yekatkov Feb 10, 2022
af6320d
Implemented another primitives.
yekatkov Feb 10, 2022
989c31c
Fix farsing floating point argument in linux.
yekatkov Feb 11, 2022
a042084
Fix indexing of GPR for Linux
yekatkov Feb 11, 2022
4f3e62b
Merge remote-tracking branch 'upstream/master' into SimpleFunctionsSp…
yekatkov Feb 11, 2022
ede7bb7
Fix macOs build and PR comment
yekatkov Feb 13, 2022
d46b17b
Fix macOs build.
yekatkov Feb 14, 2022
18b89ef
Fixed macOs build
yekatkov Feb 14, 2022
8120266
Fixed some PR bugs.
yekatkov Feb 15, 2022
c3c66d2
Replace 'lea' instruction with 'add'.
yekatkov Feb 15, 2022
3514989
Merge remote-tracking branch 'upstream/master' into SimpleFunctionsSp…
yekatkov Feb 15, 2022
dd0fe1d
Merge branch 'master' into SimpleFunctionsSpeedUp
Mar 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions NeoMathEngine/include/NeoMathEngine/SimdMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,33 @@ class ISimdMathEngine : public CCrtAllocatedObject {
virtual void Exp( float* dst, const float* src, size_t dataSize, bool isMultithread = true ) = 0;
virtual void RunOnceRestOfLstm( CMathEngineLstmDesc* desc, const CConstFloatHandle& inputStateBackLink,
const CFloatHandle& outputStateBackLink, const CFloatHandle& outputMainBackLink, bool isMultithread = true ) = 0;

using vectorAddFunc = void (*)( const float* first, const float* second, float* result, int vectorSize );
using alignedVectorAdd = void (*)( const float* first, float* second, int vectorSize );
using vectorEltwiseMax = void (*)( const float* first, const float* second, float* result, int vectorSize );
using vectorReLU = void (*)( const float* first, float* result, int vectorSize );
using vectorReLUTreshold = void (*)( const float* first, float* result, int vectorSize, float threshold );
using alignedVectorMultiplyAndAdd = void (*)( const float* first, const float* second,
float* result, int vectorSize, const float* mult );
using vectorMultiply = void (*)( const float* first, float multiplier, float* result, int vectorSize );
using vectorEltwiseMultiply = void (*)( const float* first, const float* second, float* result, int vectorSize );
using vectorEltwiseMultiplyAdd = void (*)( const float* first, const float* second, float* result, int vectorSize );
using vectorAddValue = void (*)( const float* first, float value, float* result, int vectorSize );
using vectorDotProduct = void (*)( const float* first, const float* second, float* result, int vectorSize );
using vectorMinMax = void (*)( const float* first, float* result, int vectorSize, const float minValue, const float maxValue );

virtual vectorAddFunc GetVectorAddFunc() = 0;
virtual alignedVectorAdd GetAlignedVectorAddFunc() = 0;
virtual vectorEltwiseMax GetVectorMaxFunc() = 0;
virtual vectorReLU GetVectorReLUFunc() = 0;
virtual vectorReLUTreshold GetVectorReLUTresholdFunc() = 0;
virtual alignedVectorMultiplyAndAdd GetAlignedVectorMultiplyAndAddFunc() = 0;
virtual vectorMultiply GetVectorMultiplyFunc() = 0;
virtual vectorEltwiseMultiply GetVectorEltwiseMultiplyFunc() = 0;
virtual vectorEltwiseMultiplyAdd GetVectorEltwiseMultiplyAddFunc() = 0;
virtual vectorAddValue GetVectorAddValueFunc() = 0;
virtual vectorDotProduct GetVectorDotProductFunc() = 0;
virtual vectorMinMax GetVectorMinMaxFunc() = 0;
};

}
28 changes: 28 additions & 0 deletions NeoMathEngine/src/CPU/CpuMathEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ limitations under the License.
#include <NeoMathEngine/SimdMathEngine.h>
#include <DllLoader.h>
#include <CPUInfo.h>
#include <CpuMathEnginePrivate.h>

#if FINE_PLATFORM( FINE_ANDROID ) || FINE_PLATFORM( FINE_LINUX )
#include <PerformanceCountersCpuLinux.h>
Expand Down Expand Up @@ -78,6 +79,33 @@ CCpuMathEngine::CCpuMathEngine( int _threadCount, size_t _memoryLimit ) :
#ifdef NEOML_USE_MKL
vmlSetMode( VML_ERRMODE_NOERR );
#endif
if( simdMathEngine != nullptr ) {
vectorAdd = simdMathEngine->GetVectorAddFunc();
alignedVectorAdd = simdMathEngine->GetAlignedVectorAddFunc();
vectorEltwiseMax = simdMathEngine->GetVectorMaxFunc();
vectorReLU = simdMathEngine->GetVectorReLUFunc();
vectorReLUTreshold = simdMathEngine->GetVectorReLUTresholdFunc();
alignedVectorMultiplyAndAdd = simdMathEngine->GetAlignedVectorMultiplyAndAddFunc();
vectorMultiply = simdMathEngine->GetVectorMultiplyFunc();
vectorEltwiseMultiply = simdMathEngine->GetVectorEltwiseMultiplyFunc();
vectorEltwiseMultiplyAdd = simdMathEngine->GetVectorEltwiseMultiplyAddFunc();
vectorAddValue = simdMathEngine->GetVectorAddValueFunc();
vectorDotProduct = simdMathEngine->GetVectorDotProductFunc();
vectorMinMax = simdMathEngine->GetVectorMinMaxFunc();
} else {
vectorAdd = &NeoML::vectorAdd;
alignedVectorAdd = &NeoML::alignedVectorAdd;
vectorEltwiseMax = &NeoML::vectorEltwiseMax;
vectorReLU = &NeoML::vectorReLU;
vectorReLUTreshold = &NeoML::vectorReLUTreshold;
alignedVectorMultiplyAndAdd = &NeoML::alignedVectorMultiplyAndAdd;
vectorMultiply = &NeoML::vectorMultiply;
vectorEltwiseMultiply = &NeoML::vectorEltwiseMultiply;
vectorEltwiseMultiplyAdd = &NeoML::vectorEltwiseMultiplyAdd;
vectorAddValue = &NeoML::vectorAddValue;
vectorDotProduct = &NeoML::vectorDotProduct;
vectorMinMax = &NeoML::vectorMinMax;
}
}

CCpuMathEngine::~CCpuMathEngine()
Expand Down
14 changes: 14 additions & 0 deletions NeoMathEngine/src/CPU/CpuMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,20 @@ class CCpuMathEngine : public IMathEngine, public IRawMemoryManager {
std::unique_ptr<ISimdMathEngine> simdMathEngine; // interface for using simd instructions
SgemmFunc customSgemmFunction; // Used when it is availabled and is faster then default sgemm

void ( *vectorAdd )( const float* first, const float* second, float* result, int vectorSize );
void ( *alignedVectorAdd )( const float* first, float* second, int vectorSize );
void ( *vectorEltwiseMax )( const float* first, const float* second, float* result, int vectorSize );
void ( *vectorReLU )( const float* first, float* result, int vectorSize );
void ( *vectorReLUTreshold )( const float* first, float* result, int vectorSize, float threshold );
void ( *alignedVectorMultiplyAndAdd )( const float* first, const float* second,
float* result, int vectorSize, const float* mult );
void ( *vectorMultiply )( const float* first, float multiplier, float* result, int vectorSize );
void ( *vectorEltwiseMultiply )( const float* first, const float* second, float* result, int vectorSize );
void ( *vectorEltwiseMultiplyAdd )( const float* first, const float* second, float* result, int vectorSize );
void ( *vectorAddValue )( const float* first, float value, float* result, int vectorSize );
void ( *vectorDotProduct )( const float* first, const float* second, float* result, int vectorSize );
void ( *vectorMinMax )( const float* first, float* result, int vectorSize, const float minValue, const float maxValue );

IMathEngine& mathEngine() { IMathEngine* engine = this; return *engine; }

void blob3dConvolution1x1x1( const CBlobDesc& source, const CBlobDesc& filter, const CBlobDesc& result,
Expand Down
6 changes: 3 additions & 3 deletions NeoMathEngine/src/CPU/CpuMathEngineBlas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ void CCpuMathEngine::AddVectorToMatrixColumns(const CConstFloatHandle& matrixHan
const float* vector = GetRaw( vectorHandle );

for(int i = 0; i < matrixHeight; ++i) {
vectorAddValue(matrix, result, matrixWidth, *vector);
vectorAddValue(matrix, *vector, result, matrixWidth);
matrix += matrixWidth;
result += matrixWidth;
++vector;
Expand Down Expand Up @@ -276,7 +276,7 @@ void CCpuMathEngine::RowMultiplyMatrixByMatrix(const CConstFloatHandle& firstHan
float* result = GetRaw( resultHandle );

for(int i = 0; i < height; ++i) {
vectorDotProduct(first, second, width, result);
vectorDotProduct(first, second, result, width);
first += width;
second += width;
++result;
Expand Down Expand Up @@ -819,7 +819,7 @@ void CCpuMathEngine::MultiplyDiagMatrixByMatrix( const CConstFloatHandle& firstH
NEOML_OMP_FOR_NUM_THREADS( curThreadCount )
for( int i = 0; i < firstSize; i++ ) {
const float multiplier = *( first + i );
vectorMultiply( second + i * secondWidth, result + i * secondWidth, multiplier, secondWidth );
vectorMultiply( second + i * secondWidth, multiplier, result + i * secondWidth, secondWidth );
}
}

Expand Down
6 changes: 3 additions & 3 deletions NeoMathEngine/src/CPU/CpuMathEngineDnn3dConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void CCpuMathEngine::blob3dConvolution1x1x1Backward( const CCommon3dConvolutionD
for( int i = 0; i < resultBlob.Width(); ++i ) {
float* inputDiffPixel = inputDiffCol;
for( int k = 0; k < resultBlob.Depth(); ++k ) {
NeoML::vectorAdd( inputDiffPixel, resultData, inputDiffPixel, inputDiff.Channels() );
vectorAdd( inputDiffPixel, resultData, inputDiffPixel, inputDiff.Channels() );
inputDiffPixel += inputDiff.Channels() * desc.StrideDepth;
resultData += inputDiff.Channels();
}
Expand Down Expand Up @@ -437,8 +437,8 @@ void CCpuMathEngine::blob3dConvolutionBackward( const CCommon3dConvolutionDesc&
int outputLineCount;
if( OmpGetTaskIndexAndCount( outputLineY, outputLineStart, outputLineCount ) ) {
if( freeTermData == 0 ) {
vectorFill( resultData + outputLineStart * outputRowSize,
0, outputLineCount * outputRowSize );
vectorFill0( resultData + outputLineStart * outputRowSize,
outputLineCount * outputRowSize );
}

int outputLineEnd = outputLineStart + outputLineCount;
Expand Down
14 changes: 7 additions & 7 deletions NeoMathEngine/src/CPU/CpuMathEngineDnnChannelwiseConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride2( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resultFirstRow );
} else {
NeoML::vectorFill( resultFirstRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resultFirstRow, resultDesc.Width() * channels );
}

processFilterRowStride2( desc, filter + filterRowSize, sourceFirstRow, resultFirstRow );
Expand All @@ -181,7 +181,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride2( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resRow );
} else {
NeoML::vectorFill( resRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resRow, resultDesc.Width() * channels );
}

processFilterRowStride2( desc, filter, srcRow, resRow );
Expand All @@ -193,7 +193,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride2( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resultLastRow );
} else {
NeoML::vectorFill( resultLastRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resultLastRow, resultDesc.Width() * channels );
}

processFilterRowStride2( desc, filter, sourceLastRow, resultLastRow );
Expand Down Expand Up @@ -263,7 +263,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride1( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resultFirstRow );
} else {
NeoML::vectorFill( resultFirstRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resultFirstRow, resultDesc.Width() * channels );
}
processFilterRowStride1( desc, filter + filterRowSize, sourceFirstRow, resultFirstRow );
if( resultCount >= 0 ) {
Expand All @@ -277,7 +277,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride1( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resRow );
} else {
NeoML::vectorFill( resRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resRow, resultDesc.Width() * channels );
}

processFilterRowStride1( desc, filter, srcRow, resRow );
Expand All @@ -289,7 +289,7 @@ void CCpuMathEngine::blobChannelwiseConvolutionFilter3x3Padding1Stride1( const C
if( freeTerm != 0 ) {
fillResultRow( desc, freeTerm, resultLastRow );
} else {
NeoML::vectorFill( resultLastRow, 0, resultDesc.Width() * channels );
NeoML::vectorFill0( resultLastRow, resultDesc.Width() * channels );
}

processFilterRowStride1( desc, filter, sourceLastRow, resultLastRow );
Expand Down Expand Up @@ -366,7 +366,7 @@ void CCpuMathEngine::BlobChannelwiseConvolution( const CChannelwiseConvolutionDe
NeoML::dataCopy(rowStart, freeTerm, channels);
}
} else {
NeoML::vectorFill(resultRow, 0, resultDesc.Width() * channels);
NeoML::vectorFill0(resultRow, resultDesc.Width() * channels);
}

const int filterFirstRow = max( 0, -firstFilteredRow );
Expand Down
8 changes: 4 additions & 4 deletions NeoMathEngine/src/CPU/CpuMathEngineDnnConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ void CCpuMathEngine::fillTempData( const float* sourceData, float* tempData, con
for( int h = 0; h < desc.Filter.Height(); h++ ) {
if( 0 <= sourceHeight + h * desc.DilationHeight && sourceHeight + h * desc.DilationHeight < desc.Source.Height() ) {
if( startPaddingSize > 0 ) {
NeoML::vectorFill( tempStartPaddingPtr, 0.0, startPaddingSize * channelsCount );
NeoML::vectorFill0( tempStartPaddingPtr, startPaddingSize * channelsCount );
}

if( desc.DilationWidth == 1 ) {
Expand All @@ -369,10 +369,10 @@ void CCpuMathEngine::fillTempData( const float* sourceData, float* tempData, con
}

if( endPaddingSize > 0 ) {
NeoML::vectorFill( tempEndPaddingPtr, 0.0, endPaddingSize * channelsCount );
NeoML::vectorFill0( tempEndPaddingPtr, endPaddingSize * channelsCount );
}
} else {
NeoML::vectorFill( tempStartPaddingPtr, 0.0, filterLineSize );
NeoML::vectorFill0( tempStartPaddingPtr, filterLineSize );
}

tempStartPaddingPtr += filterLineSize;
Expand Down Expand Up @@ -577,7 +577,7 @@ void CCpuMathEngine::backwardConvolutionAddFilterToOutput( const CCpuConvolution
// Set the free term
setVectorToMatrixRows( outputDataPtr, output.Width(), output.Depth() * output.Channels(), freeTermDataRaw );
} else {
vectorFill( outputDataPtr, 0, output.Width() * output.Depth() * output.Channels() );
vectorFill0( outputDataPtr, output.Width() * output.Depth() * output.Channels() );
}

int batch = step / output.Height();
Expand Down
6 changes: 3 additions & 3 deletions NeoMathEngine/src/CPU/CpuMathEngineDnnRleConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ static inline void updateFilterConv( IMathEngine& mathEngine, CCpuRleConvolution
const float* convertFilterDataPtr = GetRaw( desc.ConvertedFilter.GetHandle() );
for( int j = 0; j < filterHeight; ++j ) {
for( int i = 0; i < filterWidth; ++i ) {
alignedVectorAdd( zeroFilterConvPtr, convertFilterDataPtr, filterCount );
alignedVectorAdd( convertFilterDataPtr, zeroFilterConvPtr, filterCount );
convertFilterDataPtr += filterCount;
}
zeroFilterConvPtr += filterCount;
Expand Down Expand Up @@ -268,7 +268,7 @@ void CCpuMathEngine::BlobRleConvolution( const CRleConvolutionDesc& convDesc, co
const float* curFilterConvData = filterConvData + index * filterConvStep;
float* curOutput = output;
for( int j = 0; j < jCount; ++j ) {
alignedVectorAdd( curOutput, curFilterConvData, filterCount );
alignedVectorAdd( curFilterConvData, curOutput, filterCount );
curFilterConvData += strideHeight * filterCount;
curOutput += outputRowSize;
}
Expand Down Expand Up @@ -382,7 +382,7 @@ void CCpuMathEngine::BlobRleConvolutionLearnAdd( const CRleConvolutionDesc& conv
// Calculate diff separately for the free terms
for( int j = 0; j < outputDiff.Height(); ++j ) {
for( int k = 0; k < outputDiff.Width(); ++k ) {
alignedVectorAdd( freeTermDiffReductionPrivatePtr, outputDiffDataPtr, filterCount );
alignedVectorAdd( outputDiffDataPtr, freeTermDiffReductionPrivatePtr, filterCount );
outputDiffDataPtr += filterCount;
}
}
Expand Down
16 changes: 8 additions & 8 deletions NeoMathEngine/src/CPU/CpuMathEngineVectorMath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ void CCpuMathEngine::VectorAdd(const CConstFloatHandle& firstHandle, const CCons
NEOML_OMP_NUM_THREADS( curThreadCount ) {
int index, count;
if( OmpGetTaskIndexAndCount( vectorSize, 16, index, count ) ) {
NeoML::vectorAdd( GetRaw(firstHandle + index), GetRaw(secondHandle + index), GetRaw(resultHandle + index), count );
vectorAdd( GetRaw(firstHandle + index), GetRaw(secondHandle + index), GetRaw(resultHandle + index), count );
}
}
} else {
NeoML::vectorAdd( GetRaw(firstHandle), GetRaw(secondHandle), GetRaw(resultHandle), vectorSize );
vectorAdd( GetRaw(firstHandle), GetRaw(secondHandle), GetRaw(resultHandle), vectorSize );
}
}

Expand Down Expand Up @@ -352,7 +352,7 @@ void CCpuMathEngine::VectorAddValue(const CConstFloatHandle& firstHandle, const
float* result = GetRaw( resultHandle );
float value = *GetRaw( addition );

vectorAddValue( first, result, vectorSize, value );
vectorAddValue( first, value, result, vectorSize );
}

void CCpuMathEngine::VectorDotProduct(const CConstFloatHandle& firstHandle, const CConstFloatHandle& secondHandle,
Expand All @@ -367,7 +367,7 @@ void CCpuMathEngine::VectorDotProduct(const CConstFloatHandle& firstHandle, cons
const float* second = GetRaw( secondHandle );
float* result = GetRaw( resultHandle );

vectorDotProduct( first, second, vectorSize, result );
vectorDotProduct( first, second, result, vectorSize );
}

void CCpuMathEngine::VectorTopK(const CConstFloatHandle& firstHandle, int firstSize, int k, const CFloatHandle& resultHandle,
Expand Down Expand Up @@ -470,11 +470,11 @@ void CCpuMathEngine::VectorMultiply(const CConstFloatHandle& firstHandle,
NEOML_OMP_NUM_THREADS( curThreadCount ) {
int index, count;
if( OmpGetTaskIndexAndCount( vectorSize, 16, index, count ) ) {
vectorMultiply( GetRaw( firstHandle + index ), GetRaw( resultHandle + index ), multiplier, count );
vectorMultiply( GetRaw( firstHandle + index ), multiplier, GetRaw( resultHandle + index ), count );
}
}
} else {
vectorMultiply( GetRaw( firstHandle ), GetRaw( resultHandle ), multiplier, vectorSize );
vectorMultiply( GetRaw( firstHandle ), multiplier, GetRaw( resultHandle ), vectorSize );
}
}

Expand Down Expand Up @@ -656,11 +656,11 @@ void CCpuMathEngine::VectorMinMax(const CConstFloatHandle& firstHandle, const CF
NEOML_OMP_NUM_THREADS( curThreadCount ) {
int index, count;
if( OmpGetTaskIndexAndCount( vectorSize, 16, index, count ) ) {
vectorMinMax( GetRaw(firstHandle + index), GetRaw(resultHandle + index), minValue, maxValue, count );
vectorMinMax( GetRaw(firstHandle + index), GetRaw(resultHandle + index), count, minValue, maxValue );
}
}
} else {
vectorMinMax( GetRaw(firstHandle ), GetRaw(resultHandle ), minValue, maxValue, vectorSize );
vectorMinMax( GetRaw(firstHandle ), GetRaw(resultHandle ), vectorSize, minValue, maxValue );
}
}

Expand Down
8 changes: 4 additions & 4 deletions NeoMathEngine/src/CPU/arm/CpuArmMathEngineVectorMathPrivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ inline void alignedVectorMultiplyAndAdd( const float* first, const float* second

//------------------------------------------------------------------------------------------------------------

inline void vectorMultiply( const float* first, float* result, float multiplier, int vectorSize )
inline void vectorMultiply( const float* first, float multiplier, float* result, int vectorSize )
{
int count = GetCount4(vectorSize);
float32x4_t mult = vdupq_n_f32(multiplier);
Expand Down Expand Up @@ -504,7 +504,7 @@ inline void vectorReLU( const float* first, float* result, int vectorSize, float

//------------------------------------------------------------------------------------------------------------

inline void vectorAddValue( const float* first, float* result, int vectorSize, float value )
inline void vectorAddValue( const float* first, float value, float* result, int vectorSize )
{
float32x4_t addition = vdupq_n_f32(value);

Expand All @@ -526,7 +526,7 @@ inline void vectorAddValue( const float* first, float* result, int vectorSize, f

//------------------------------------------------------------------------------------------------------------

inline void vectorDotProduct( const float* first, const float* second, int vectorSize, float* result )
inline void vectorDotProduct( const float* first, const float* second, float* result, int vectorSize )
{
float32x4_t acc = vdupq_n_f32(0);

Expand Down Expand Up @@ -709,7 +709,7 @@ static inline void qrnnIfPoolingStep( const float* z, const float* f, const floa
}
}

inline void vectorMinMax( const float* first, float* result, const float minValue, const float maxValue, int vectorSize )
inline void vectorMinMax( const float* first, float* result, int vectorSize, const float minValue, const float maxValue )
{
int count = GetCount4(vectorSize);

Expand Down
Loading