From a133b8aae95e4dc139fb72c740ea4760ed5612ee Mon Sep 17 00:00:00 2001 From: Kirill Golikov Date: Thu, 25 Apr 2024 17:20:13 +0200 Subject: [PATCH 1/4] [NeoML] add to CBlobDesc empirically better size for a blob Signed-off-by: Kirill Golikov --- .../include/NeoMathEngine/BlobDesc.h | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/NeoMathEngine/include/NeoMathEngine/BlobDesc.h b/NeoMathEngine/include/NeoMathEngine/BlobDesc.h index adfc9a480..cc528f81b 100644 --- a/NeoMathEngine/include/NeoMathEngine/BlobDesc.h +++ b/NeoMathEngine/include/NeoMathEngine/BlobDesc.h @@ -59,6 +59,15 @@ class NEOMATHENGINE_API CBlobDesc final { explicit CBlobDesc( TBlobType dataType = CT_Invalid ); CBlobDesc( std::initializer_list list ); + CBlobDesc( CBlobDesc&& ) = default; + CBlobDesc( const CBlobDesc& ) = default; + + CBlobDesc& operator=( CBlobDesc&& ) = default; + CBlobDesc& operator=( const CBlobDesc& other ); + + bool operator==( const CBlobDesc& other ) const { return type == other.type && HasEqualDimensions( other ); } + bool operator!=( const CBlobDesc& other ) const { return !( *this == other ); } + // The maximum possible sequence length for a recurrent network int BatchLength() const { return dimensions[BD_BatchLength]; } // The number of sequences in the blob @@ -75,6 +84,8 @@ class NEOMATHENGINE_API CBlobDesc final { int Channels() const { return dimensions[BD_Channels]; } // The blob size, in elements int BlobSize() const; + // The empirically better size for this blob, in elements + int MemorySize() const { return memorySize; } // The size of one object in the blob int ObjectSize() const { return Height() * Width() * Depth() * Channels(); } // The number of objects in the blob @@ -85,8 +96,10 @@ class NEOMATHENGINE_API CBlobDesc final { // The size of the dimension with a given index int DimSize( int d ) const { return dimensions[d]; } // Sets the size of the dimension with a given index - void SetDimSize( int d, int size ) { dimensions[d] = size; } + void SetDimSize( int d, int size ); + // If memory size of original blob >= required, the dimensions could be reinterpreted + bool FitForReinterpretDimensions( const CBlobDesc& other ) const; // Checks if the described blob has the same dimensions bool HasEqualDimensions( const CBlobDesc& other ) const; bool HasEqualDimensions( const CBlobDesc& other, std::initializer_list dimensions ) const; @@ -99,12 +112,16 @@ class NEOMATHENGINE_API CBlobDesc final { private: int dimensions[MaxDimensions]{}; TBlobType type = CT_Invalid; + int memorySize = 0; // empirically better size for this blob, count in elements + + void setMemorySize( int blobSize ) { memorySize = ( memorySize > blobSize ) ? memorySize : blobSize; } }; //--------------------------------------------------------------------------------------------------------------------- inline CBlobDesc::CBlobDesc( TBlobType dataType ) : - type( dataType ) + type( dataType ), + memorySize( 1 ) { for( int i = 0; i < MaxDimensions; i++ ) { dimensions[i] = 1; @@ -112,7 +129,8 @@ inline CBlobDesc::CBlobDesc( TBlobType dataType ) : } inline CBlobDesc::CBlobDesc( std::initializer_list list ) : - type( CT_Float ) + type( CT_Float ), + memorySize( 0 ) { int i = BD_Count - 1; int j = static_cast( list.size() ) - 1; @@ -127,6 +145,19 @@ inline CBlobDesc::CBlobDesc( std::initializer_list list ) : dimensions[i] = 1; i--; } + setMemorySize( BlobSize() ); +} + +inline CBlobDesc& CBlobDesc::operator=( const CBlobDesc& other ) +{ + if( this != &other ) { + for( int i = 0; i < MaxDimensions; i++ ) { + dimensions[i] = other.dimensions[i]; + } + type = other.type; + setMemorySize( other.BlobSize() ); + } + return *this; } inline int CBlobDesc::BlobSize() const @@ -138,6 +169,17 @@ inline int CBlobDesc::BlobSize() const return blobSize; } +inline void CBlobDesc::SetDimSize( int d, int size ) +{ + dimensions[d] = size; + setMemorySize( BlobSize() ); +} + +inline bool CBlobDesc::FitForReinterpretDimensions( const CBlobDesc& other ) const +{ + return BlobSize() <= other.MemorySize(); +} + inline bool CBlobDesc::HasEqualDimensions( const CBlobDesc& other ) const { for( TBlobDim d = TBlobDim( 0 ); d < BD_Count; ++d ) { From d855ec542efe11599564f801d908e6f1a8405350 Mon Sep 17 00:00:00 2001 From: Kirill Golikov Date: Thu, 25 Apr 2024 17:16:19 +0200 Subject: [PATCH 2/4] [NeoML] in CDnnBlob use BlobDesc.MemorySize Signed-off-by: Kirill Golikov --- NeoML/include/NeoML/Dnn/DnnBlob.h | 2 +- NeoML/src/Dnn/DnnBlob.cpp | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/NeoML/include/NeoML/Dnn/DnnBlob.h b/NeoML/include/NeoML/Dnn/DnnBlob.h index f9c0b1138..419da4880 100644 --- a/NeoML/include/NeoML/Dnn/DnnBlob.h +++ b/NeoML/include/NeoML/Dnn/DnnBlob.h @@ -164,7 +164,7 @@ class NEOML_API CDnnBlob : public IObject { // Changes the blob dimensions "names" without moving the data // In effect, only the blob description is changed - // As the data is unaffected, the total blob size specified by the new descriptor should be the same + // As the data is unaffected, the total blob size specified by the new descriptor should be less or the same void ReinterpretDimensions( const CBlobDesc& newDesc ); // Merges blobs along the given dimension diff --git a/NeoML/src/Dnn/DnnBlob.cpp b/NeoML/src/Dnn/DnnBlob.cpp index ddb537872..871fb8aef 100644 --- a/NeoML/src/Dnn/DnnBlob.cpp +++ b/NeoML/src/Dnn/DnnBlob.cpp @@ -167,13 +167,14 @@ void CDnnBlob::initializeByPattern(TBlobType type, const CBlobDesc& pattern) { NeoAssert(desc.GetDataType() == CT_Invalid); - const int size = pattern.BlobSize(); + const int allocSize = pattern.MemorySize(); + NeoAssert( allocSize >= pattern.BlobSize() ); switch(type) { case CT_Float: - data = mathEngine.HeapAllocTyped( size ); + data = mathEngine.HeapAllocTyped( allocSize ); break; case CT_Int: - data = mathEngine.HeapAllocTyped( size ); + data = mathEngine.HeapAllocTyped( allocSize ); break; default: NeoAssert( false ); @@ -381,11 +382,11 @@ void CDnnBlob::TransposeFrom(const CDnnBlob* other, int _d1, int _d2) // Changes the blob dimensions "names" without moving the data // In effect, only the blob descriptor is changed -// As the data is unaffected, the total blob size specified by the new descriptor should be the same +// As the data is unaffected, the total blob size specified by the new descriptor should be less or the same void CDnnBlob::ReinterpretDimensions( const CBlobDesc& newDesc ) { NeoAssert( parent == 0 ); - NeoAssert( newDesc.BlobSize() == desc.BlobSize() ); + NeoAssert( newDesc.BlobSize() <= desc.MemorySize() ); desc = newDesc; } From fffcbaec22618cf197903730fc2152e2c9e2907f Mon Sep 17 00:00:00 2001 From: Kirill Golikov Date: Thu, 9 May 2024 20:39:41 +0200 Subject: [PATCH 3/4] [NeoMathEngine] GetReuseMemoryMode for less memory usage Signed-off-by: Kirill Golikov --- NeoML/src/Dnn/BaseLayer.cpp | 8 +++++--- NeoML/src/Dnn/Dnn.cpp | 4 ++-- NeoML/src/Dnn/DnnBlob.cpp | 2 +- NeoMathEngine/src/MemoryEngineMixin.cpp | 6 ++++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/NeoML/src/Dnn/BaseLayer.cpp b/NeoML/src/Dnn/BaseLayer.cpp index 133c3fc20..18065b3ec 100644 --- a/NeoML/src/Dnn/BaseLayer.cpp +++ b/NeoML/src/Dnn/BaseLayer.cpp @@ -234,13 +234,15 @@ bool CBaseLayer::InputsMayBeOverwritten() const // The class that switches memory reuse mode class CMemoryModeSwitcher { public: - explicit CMemoryModeSwitcher( IMathEngine& _mathEngine, bool _need ) : mathEngine( _mathEngine ), need( _need ) - { if( need ) { mathEngine.SetReuseMemoryMode( true ); } } + explicit CMemoryModeSwitcher( IMathEngine& _mathEngine, bool _need ) : + mathEngine( _mathEngine ), need( _need ), mode( mathEngine.GetReuseMemoryMode() ) + { if( need > mode ) { mathEngine.SetReuseMemoryMode( true ); } } ~CMemoryModeSwitcher() - { if( need ) { mathEngine.SetReuseMemoryMode( false ); } } + { if( need > mode ) { mathEngine.SetReuseMemoryMode( false ); } } public: IMathEngine& mathEngine; bool need; + bool mode; }; void CBaseLayer::AllocateOutputBlobs() diff --git a/NeoML/src/Dnn/Dnn.cpp b/NeoML/src/Dnn/Dnn.cpp index cbd1e1d5e..560088fec 100644 --- a/NeoML/src/Dnn/Dnn.cpp +++ b/NeoML/src/Dnn/Dnn.cpp @@ -416,7 +416,7 @@ CDnn::CDnn( CRandom& _random, IMathEngine& _mathEngine, const CCompositeLayer* o currentSequencePos( 0 ), isReverseSequense( false ), autoRestartMode( true ), - isReuseMemoryMode( false ) + isReuseMemoryMode( mathEngine.GetReuseMemoryMode() ) { solver = FINE_DEBUG_NEW CDnnSimpleGradientSolver( mathEngine ); initializer = FINE_DEBUG_NEW CDnnXavierInitializer( random ); @@ -648,7 +648,7 @@ void CDnn::RunOnce() reshape(); // rebuild the network if necessary // During inference we turning reuseMemoryMode on when the net is big enough - isReuseMemoryMode = ( getOutputBlobsSize() > MinReuseMemoryModeNetSize ); + isReuseMemoryMode = mathEngine.GetReuseMemoryMode() || ( getOutputBlobsSize() > MinReuseMemoryModeNetSize ); runOnce( 0 ); #ifdef NEOML_USE_FINEOBJ } catch( CCheckException* exception ) { diff --git a/NeoML/src/Dnn/DnnBlob.cpp b/NeoML/src/Dnn/DnnBlob.cpp index 871fb8aef..c3507c6ab 100644 --- a/NeoML/src/Dnn/DnnBlob.cpp +++ b/NeoML/src/Dnn/DnnBlob.cpp @@ -167,7 +167,7 @@ void CDnnBlob::initializeByPattern(TBlobType type, const CBlobDesc& pattern) { NeoAssert(desc.GetDataType() == CT_Invalid); - const int allocSize = pattern.MemorySize(); + const int allocSize = mathEngine.GetReuseMemoryMode() ? pattern.MemorySize() : pattern.BlobSize(); NeoAssert( allocSize >= pattern.BlobSize() ); switch(type) { case CT_Float: diff --git a/NeoMathEngine/src/MemoryEngineMixin.cpp b/NeoMathEngine/src/MemoryEngineMixin.cpp index 391a6bd34..3e9431bf2 100644 --- a/NeoMathEngine/src/MemoryEngineMixin.cpp +++ b/NeoMathEngine/src/MemoryEngineMixin.cpp @@ -51,8 +51,10 @@ void CMemoryEngineMixin::SetReuseMemoryMode( bool enable ) case MET_Metal: case MET_Vulkan: { - std::lock_guard lock( Mutex ); - MemoryPool->SetReuseMemoryMode( enable ); + if( enable != GetReuseMemoryMode() ) { + std::lock_guard lock( Mutex ); + MemoryPool->SetReuseMemoryMode( enable ); + } break; } default: From 387b47c59aa1dca41fcff23ae5b1eca9f6342a26 Mon Sep 17 00:00:00 2001 From: Kirill Golikov Date: Thu, 29 Aug 2024 13:53:06 +0200 Subject: [PATCH 4/4] [NeoML] use BlobDesc.MemorySize in CBaseLayer Signed-off-by: Kirill Golikov --- NeoML/src/Dnn/BaseLayer.cpp | 56 ++++++++++++++----------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/NeoML/src/Dnn/BaseLayer.cpp b/NeoML/src/Dnn/BaseLayer.cpp index 18065b3ec..a5df17030 100644 --- a/NeoML/src/Dnn/BaseLayer.cpp +++ b/NeoML/src/Dnn/BaseLayer.cpp @@ -263,13 +263,11 @@ void CBaseLayer::AllocateOutputBlobs() for( int i = 0; i < outputDescs.Size(); ++i ) { if( outputBlobs[i] == nullptr ) { outputBlobs[i] = CDnnBlob::CreateBlob( MathEngine(), outputDescs[i].GetDataType(), outputDescs[i] ); - } else { - if( !outputBlobs[i]->GetDesc().HasEqualDimensions( outputDescs[i] ) ) { - // If this output can be connected to in-place transform. And on the second run outputBlob's shape can mismatch with outputDesc. - // That's why now reinterpret it (because this layer can depend on outputBlob's shape). - // After that transform will change it again. - outputBlobs[i]->ReinterpretDimensions( outputDescs[i] ); - } + } else if( !outputBlobs[i]->GetDesc().HasEqualDimensions( outputDescs[i] ) ) { + // If this output can be connected to in-place transform. And on the second run outputBlob's shape can mismatch with outputDesc. + // That's why now reinterpret it (because this layer can depend on outputBlob's shape). + // After that transform will change it again. + outputBlobs[i]->ReinterpretDimensions( outputDescs[i] ); } } } @@ -410,15 +408,15 @@ void CBaseLayer::reshape() { NeoAssert( dnn != 0 ); // possible only in a network - if( !isReshapeNeeded && !forcedReshape) { + if( !isReshapeNeeded && !forcedReshape ) { return; } isReshapeNeeded = false; CArray prevInputDescs; - inputDescs.MoveTo( prevInputDescs ); - inputDescs.SetSize(inputs.Size()); - + inputDescs.CopyTo( prevInputDescs ); // do not delete, do not loose the desc's memorySize + inputDescs.SetSize( inputs.Size() ); // for the first time + // Call the input layers reshape recursively, reset the input blobs for( int i = 0; i < GetInputCount(); ++i ) { GetInputLayer(i)->reshape(); @@ -433,7 +431,7 @@ void CBaseLayer::reshape() if(!forcedReshape) { for(int i = 0; i < inputBlobs.Size(); i++) { - forcedReshape = forcedReshape + forcedReshape = forcedReshape || !inputDescs[i].HasEqualDimensions(prevInputDescs[i]); } } @@ -450,7 +448,6 @@ void CBaseLayer::reshape() for( int cacheType = 0; cacheType < BCT_Count; ++cacheType ) { blobCache[cacheType].DeleteAll(); } - outputDescs.SetSize( outputs.Size() ); inputDiffBlobs.DeleteAll(); @@ -466,6 +463,7 @@ void CBaseLayer::reshape() MathEngine().CleanUp(); } + // Define the outputDescs array Reshape(); blobsNeededForBackward = ( IsBackwardPerformed() ? BlobsForBackward() : 0 ) | ( IsLearningPerformed() ? BlobsForLearn() : 0 ); @@ -528,34 +526,22 @@ void CBaseLayer::runOnce() GetInputLayer(i)->runOnce(); } + const bool mayFreeIoBlobs = GetDnn()->isReuseMemoryMode + && ( !GetDnn()->isBackwardPerformed || !GetDnn()->IsRecurrentMode() || GetDnn()->IsLastSequencePos() + || ( ( blobsNeededForBackward & TInputBlobs ) == 0 && ( !isInPlace || ( blobsNeededForBackward & TOutputBlobs ) == 0 ) ) ); + // Either this is the first runOnce after reshape // or the input and output blobs are released directly after use for( int i = 0; i < inputBlobs.Size(); ++i ) { CBaseLayer* inputLayer = GetInputLayer( i ); const int outputNumber = inputs[i].OutputNumber; - CDnnBlob* prevLayerOutput = inputLayer->outputBlobs[outputNumber].Ptr(); + inputBlobs[i] = inputLayer->outputBlobs[outputNumber].Ptr(); - if( prevLayerOutput == inputBlobs[i].Ptr() ) { - continue; - } - - inputBlobs[i] = prevLayerOutput; - } - - const bool mayFreeIoBlobs = GetDnn()->isReuseMemoryMode - && ( !GetDnn()->isBackwardPerformed || !GetDnn()->IsRecurrentMode() || GetDnn()->IsLastSequencePos() - || ( ( blobsNeededForBackward & TInputBlobs ) == 0 && ( !isInPlace || ( blobsNeededForBackward & TOutputBlobs ) == 0 ) ) ); - - if( mayFreeIoBlobs ) { - for( int i = 0; i < inputBlobs.Size(); ++i ) { - CBaseLayer* inputLayer = GetInputLayer( i ); - const int outputNumber = inputs[i].OutputNumber; - - if( inputLayer->lastOutputUser[outputNumber] == this - && ( inputLayer->blobsNeededForBackward & TOutputBlobs ) == 0 ) - { - inputLayer->outputBlobs[outputNumber] = nullptr; - } + if( mayFreeIoBlobs + && inputLayer->lastOutputUser[outputNumber] == this + && ( inputLayer->blobsNeededForBackward & TOutputBlobs ) == 0 ) + { + inputLayer->outputBlobs[outputNumber] = nullptr; } }