Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NeoML] CDnnBlob uses the empirically better size #1054

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NeoML/include/NeoML/Dnn/DnnBlob.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class NEOML_API CDnnBlob : public IObject {

// Changes the blob dimensions "names" without moving the data
// In effect, only the blob description is changed
// As the data is unaffected, the total blob size specified by the new descriptor should be the same
// As the data is unaffected, the total blob size specified by the new descriptor should be less or the same
void ReinterpretDimensions( const CBlobDesc& newDesc );

// Merges blobs along the given dimension
Expand Down
64 changes: 26 additions & 38 deletions NeoML/src/Dnn/BaseLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,15 @@ bool CBaseLayer::InputsMayBeOverwritten() const
// The class that switches memory reuse mode
class CMemoryModeSwitcher {
public:
explicit CMemoryModeSwitcher( IMathEngine& _mathEngine, bool _need ) : mathEngine( _mathEngine ), need( _need )
{ if( need ) { mathEngine.SetReuseMemoryMode( true ); } }
explicit CMemoryModeSwitcher( IMathEngine& _mathEngine, bool _need ) :
mathEngine( _mathEngine ), need( _need ), mode( mathEngine.GetReuseMemoryMode() )
{ if( need > mode ) { mathEngine.SetReuseMemoryMode( true ); } }
~CMemoryModeSwitcher()
{ if( need ) { mathEngine.SetReuseMemoryMode( false ); } }
{ if( need > mode ) { mathEngine.SetReuseMemoryMode( false ); } }
public:
IMathEngine& mathEngine;
bool need;
bool mode;
};

void CBaseLayer::AllocateOutputBlobs()
Expand All @@ -261,13 +263,11 @@ void CBaseLayer::AllocateOutputBlobs()
for( int i = 0; i < outputDescs.Size(); ++i ) {
if( outputBlobs[i] == nullptr ) {
outputBlobs[i] = CDnnBlob::CreateBlob( MathEngine(), outputDescs[i].GetDataType(), outputDescs[i] );
} else {
if( !outputBlobs[i]->GetDesc().HasEqualDimensions( outputDescs[i] ) ) {
// If this output can be connected to in-place transform. And on the second run outputBlob's shape can mismatch with outputDesc.
// That's why now reinterpret it (because this layer can depend on outputBlob's shape).
// After that transform will change it again.
outputBlobs[i]->ReinterpretDimensions( outputDescs[i] );
}
} else if( !outputBlobs[i]->GetDesc().HasEqualDimensions( outputDescs[i] ) ) {
// If this output can be connected to in-place transform. And on the second run outputBlob's shape can mismatch with outputDesc.
// That's why now reinterpret it (because this layer can depend on outputBlob's shape).
// After that transform will change it again.
outputBlobs[i]->ReinterpretDimensions( outputDescs[i] );
}
}
}
Expand Down Expand Up @@ -408,15 +408,15 @@ void CBaseLayer::reshape()
{
NeoAssert( dnn != 0 ); // possible only in a network

if( !isReshapeNeeded && !forcedReshape) {
if( !isReshapeNeeded && !forcedReshape ) {
return;
}
isReshapeNeeded = false;

CArray<CBlobDesc> prevInputDescs;
inputDescs.MoveTo( prevInputDescs );
inputDescs.SetSize(inputs.Size());
inputDescs.CopyTo( prevInputDescs ); // do not delete, do not loose the desc's memorySize
inputDescs.SetSize( inputs.Size() ); // for the first time

// Call the input layers reshape recursively, reset the input blobs
for( int i = 0; i < GetInputCount(); ++i ) {
GetInputLayer(i)->reshape();
Expand All @@ -431,7 +431,7 @@ void CBaseLayer::reshape()

if(!forcedReshape) {
for(int i = 0; i < inputBlobs.Size(); i++) {
forcedReshape = forcedReshape
forcedReshape = forcedReshape
|| !inputDescs[i].HasEqualDimensions(prevInputDescs[i]);
}
}
Expand All @@ -448,7 +448,6 @@ void CBaseLayer::reshape()
for( int cacheType = 0; cacheType < BCT_Count; ++cacheType ) {
blobCache[cacheType].DeleteAll();
}

outputDescs.SetSize( outputs.Size() );

inputDiffBlobs.DeleteAll();
Expand All @@ -464,6 +463,7 @@ void CBaseLayer::reshape()
MathEngine().CleanUp();
}

// Define the outputDescs array
Reshape();
blobsNeededForBackward = ( IsBackwardPerformed() ? BlobsForBackward() : 0 )
| ( IsLearningPerformed() ? BlobsForLearn() : 0 );
Expand Down Expand Up @@ -526,34 +526,22 @@ void CBaseLayer::runOnce()
GetInputLayer(i)->runOnce();
}

const bool mayFreeIoBlobs = GetDnn()->isReuseMemoryMode
&& ( !GetDnn()->isBackwardPerformed || !GetDnn()->IsRecurrentMode() || GetDnn()->IsLastSequencePos()
|| ( ( blobsNeededForBackward & TInputBlobs ) == 0 && ( !isInPlace || ( blobsNeededForBackward & TOutputBlobs ) == 0 ) ) );

// Either this is the first runOnce after reshape
// or the input and output blobs are released directly after use
for( int i = 0; i < inputBlobs.Size(); ++i ) {
CBaseLayer* inputLayer = GetInputLayer( i );
const int outputNumber = inputs[i].OutputNumber;
CDnnBlob* prevLayerOutput = inputLayer->outputBlobs[outputNumber].Ptr();
inputBlobs[i] = inputLayer->outputBlobs[outputNumber].Ptr();

if( prevLayerOutput == inputBlobs[i].Ptr() ) {
continue;
}

inputBlobs[i] = prevLayerOutput;
}

const bool mayFreeIoBlobs = GetDnn()->isReuseMemoryMode
&& ( !GetDnn()->isBackwardPerformed || !GetDnn()->IsRecurrentMode() || GetDnn()->IsLastSequencePos()
|| ( ( blobsNeededForBackward & TInputBlobs ) == 0 && ( !isInPlace || ( blobsNeededForBackward & TOutputBlobs ) == 0 ) ) );

if( mayFreeIoBlobs ) {
for( int i = 0; i < inputBlobs.Size(); ++i ) {
CBaseLayer* inputLayer = GetInputLayer( i );
const int outputNumber = inputs[i].OutputNumber;

if( inputLayer->lastOutputUser[outputNumber] == this
&& ( inputLayer->blobsNeededForBackward & TOutputBlobs ) == 0 )
{
inputLayer->outputBlobs[outputNumber] = nullptr;
}
if( mayFreeIoBlobs
&& inputLayer->lastOutputUser[outputNumber] == this
&& ( inputLayer->blobsNeededForBackward & TOutputBlobs ) == 0 )
{
inputLayer->outputBlobs[outputNumber] = nullptr;
}
}

Expand Down
4 changes: 2 additions & 2 deletions NeoML/src/Dnn/Dnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ CDnn::CDnn( CRandom& _random, IMathEngine& _mathEngine, const CCompositeLayer* o
currentSequencePos( 0 ),
isReverseSequense( false ),
autoRestartMode( true ),
isReuseMemoryMode( false )
isReuseMemoryMode( mathEngine.GetReuseMemoryMode() )
{
solver = FINE_DEBUG_NEW CDnnSimpleGradientSolver( mathEngine );
initializer = FINE_DEBUG_NEW CDnnXavierInitializer( random );
Expand Down Expand Up @@ -648,7 +648,7 @@ void CDnn::RunOnce()
reshape(); // rebuild the network if necessary

// During inference we turning reuseMemoryMode on when the net is big enough
isReuseMemoryMode = ( getOutputBlobsSize() > MinReuseMemoryModeNetSize );
isReuseMemoryMode = mathEngine.GetReuseMemoryMode() || ( getOutputBlobsSize() > MinReuseMemoryModeNetSize );
runOnce( 0 );
#ifdef NEOML_USE_FINEOBJ
} catch( CCheckException* exception ) {
Expand Down
11 changes: 6 additions & 5 deletions NeoML/src/Dnn/DnnBlob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,14 @@ void CDnnBlob::initializeByPattern(TBlobType type, const CBlobDesc& pattern)
{
NeoAssert(desc.GetDataType() == CT_Invalid);

const int size = pattern.BlobSize();
const int allocSize = mathEngine.GetReuseMemoryMode() ? pattern.MemorySize() : pattern.BlobSize();
NeoAssert( allocSize >= pattern.BlobSize() );
switch(type) {
case CT_Float:
data = mathEngine.HeapAllocTyped<float>( size );
data = mathEngine.HeapAllocTyped<float>( allocSize );
break;
case CT_Int:
data = mathEngine.HeapAllocTyped<int>( size );
data = mathEngine.HeapAllocTyped<int>( allocSize );
break;
default:
NeoAssert( false );
Expand Down Expand Up @@ -381,11 +382,11 @@ void CDnnBlob::TransposeFrom(const CDnnBlob* other, int _d1, int _d2)

// Changes the blob dimensions "names" without moving the data
// In effect, only the blob descriptor is changed
// As the data is unaffected, the total blob size specified by the new descriptor should be the same
// As the data is unaffected, the total blob size specified by the new descriptor should be less or the same
void CDnnBlob::ReinterpretDimensions( const CBlobDesc& newDesc )
{
NeoAssert( parent == 0 );
NeoAssert( newDesc.BlobSize() == desc.BlobSize() );
NeoAssert( newDesc.BlobSize() <= desc.MemorySize() );

desc = newDesc;
}
Expand Down
48 changes: 45 additions & 3 deletions NeoMathEngine/include/NeoMathEngine/BlobDesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ class NEOMATHENGINE_API CBlobDesc final {
explicit CBlobDesc( TBlobType dataType = CT_Invalid );
CBlobDesc( std::initializer_list<int> list );

CBlobDesc( CBlobDesc&& ) = default;
CBlobDesc( const CBlobDesc& ) = default;

CBlobDesc& operator=( CBlobDesc&& ) = default;
CBlobDesc& operator=( const CBlobDesc& other );

bool operator==( const CBlobDesc& other ) const { return type == other.type && HasEqualDimensions( other ); }
bool operator!=( const CBlobDesc& other ) const { return !( *this == other ); }

// The maximum possible sequence length for a recurrent network
int BatchLength() const { return dimensions[BD_BatchLength]; }
// The number of sequences in the blob
Expand All @@ -75,6 +84,8 @@ class NEOMATHENGINE_API CBlobDesc final {
int Channels() const { return dimensions[BD_Channels]; }
// The blob size, in elements
int BlobSize() const;
// The empirically better size for this blob, in elements
int MemorySize() const { return memorySize; }
// The size of one object in the blob
int ObjectSize() const { return Height() * Width() * Depth() * Channels(); }
// The number of objects in the blob
Expand All @@ -85,8 +96,10 @@ class NEOMATHENGINE_API CBlobDesc final {
// The size of the dimension with a given index
int DimSize( int d ) const { return dimensions[d]; }
// Sets the size of the dimension with a given index
void SetDimSize( int d, int size ) { dimensions[d] = size; }
void SetDimSize( int d, int size );

// If memory size of original blob >= required, the dimensions could be reinterpreted
bool FitForReinterpretDimensions( const CBlobDesc& other ) const;
// Checks if the described blob has the same dimensions
bool HasEqualDimensions( const CBlobDesc& other ) const;
bool HasEqualDimensions( const CBlobDesc& other, std::initializer_list<int> dimensions ) const;
Expand All @@ -99,20 +112,25 @@ class NEOMATHENGINE_API CBlobDesc final {
private:
int dimensions[MaxDimensions]{};
TBlobType type = CT_Invalid;
int memorySize = 0; // empirically better size for this blob, count in elements

void setMemorySize( int blobSize ) { memorySize = ( memorySize > blobSize ) ? memorySize : blobSize; }
};

//---------------------------------------------------------------------------------------------------------------------

inline CBlobDesc::CBlobDesc( TBlobType dataType ) :
type( dataType )
type( dataType ),
memorySize( 1 )
{
for( int i = 0; i < MaxDimensions; i++ ) {
dimensions[i] = 1;
}
}

inline CBlobDesc::CBlobDesc( std::initializer_list<int> list ) :
type( CT_Float )
type( CT_Float ),
memorySize( 0 )
{
int i = BD_Count - 1;
int j = static_cast<int>( list.size() ) - 1;
Expand All @@ -127,6 +145,19 @@ inline CBlobDesc::CBlobDesc( std::initializer_list<int> list ) :
dimensions[i] = 1;
i--;
}
setMemorySize( BlobSize() );
}

inline CBlobDesc& CBlobDesc::operator=( const CBlobDesc& other )
{
if( this != &other ) {
for( int i = 0; i < MaxDimensions; i++ ) {
dimensions[i] = other.dimensions[i];
}
type = other.type;
setMemorySize( other.BlobSize() );
}
return *this;
}

inline int CBlobDesc::BlobSize() const
Expand All @@ -138,6 +169,17 @@ inline int CBlobDesc::BlobSize() const
return blobSize;
}

inline void CBlobDesc::SetDimSize( int d, int size )
{
dimensions[d] = size;
setMemorySize( BlobSize() );
}

inline bool CBlobDesc::FitForReinterpretDimensions( const CBlobDesc& other ) const
{
return BlobSize() <= other.MemorySize();
}

inline bool CBlobDesc::HasEqualDimensions( const CBlobDesc& other ) const
{
for( TBlobDim d = TBlobDim( 0 ); d < BD_Count; ++d ) {
Expand Down
6 changes: 4 additions & 2 deletions NeoMathEngine/src/MemoryEngineMixin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ void CMemoryEngineMixin::SetReuseMemoryMode( bool enable )
case MET_Metal:
case MET_Vulkan:
{
std::lock_guard<std::mutex> lock( Mutex );
MemoryPool->SetReuseMemoryMode( enable );
if( enable != GetReuseMemoryMode() ) {
std::lock_guard<std::mutex> lock( Mutex );
MemoryPool->SetReuseMemoryMode( enable );
}
break;
}
default:
Expand Down