Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] introduce CUDA managed memory and use it for a matching function #157

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/application/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,24 @@ int main(int argc, char **argv)
cout << "Number of features: " << rFeatures->getFeatureCount() << endl;
cout << "Number of descriptors: " << rFeatures->getDescriptorCount() << endl;

lFeatures->match( rFeatures );
int3* matches = lFeatures->matchAndReturn( rFeatures );
// lFeatures->match( rFeatures );
cudaDeviceSynchronize();

for( int i=0; i<lFeatures->getDescriptorCount(); i++ )
{
int3& match = matches[i];
if( match.z )
{
const popsift::Feature* l_f = lFeatures->getFeatureForDescriptor( i );
const popsift::Feature* r_f = rFeatures->getFeatureForDescriptor( match.x );
cout << setprecision(5) << showpoint
<< "point (" << l_f->xpos << "," << l_f->ypos << ") in l matches "
<< "point (" << r_f->xpos << "," << r_f->ypos << ") in r" << endl;
}
}

lFeatures->freeMatches( matches );

delete lFeatures;
delete rFeatures;
Expand Down
17 changes: 17 additions & 0 deletions src/popsift/common/debug_macros.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ void malloc_hst( void** ptr, int sz,
}
} }

namespace popsift { namespace cuda {
void malloc_mgd( void** ptr, int sz,
const char* file, int line )
{
cudaError_t err;
err = cudaMallocManaged( ptr, sz );
if( err != cudaSuccess ) {
std::cerr << file << ":" << line << std::endl
<< " cudaMallocManaged failed: " << cudaGetErrorString(err) << std::endl;
exit( -__LINE__ );
}
#ifdef DEBUG_INIT_DEVICE_ALLOCATIONS
memset( *ptr, 0, sz );
#endif // NDEBUG
}
} }

namespace popsift { namespace cuda {
void memcpy_async( void* dst, const void* src, size_t sz,
cudaMemcpyKind type, cudaStream_t stream,
Expand Down
14 changes: 14 additions & 0 deletions src/popsift/common/debug_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ void malloc_dev( void** ptr, int sz,
void malloc_hst( void** ptr, int sz,
const char* file, int line );

void malloc_mgd( void** ptr, int sz,
const char* file, int line );

template<class T>
T* malloc_devT( int num, const char* file, int line ) {
void* ptr;
Expand All @@ -53,6 +56,17 @@ T* malloc_hstT( int num, const char* file, int line ) {
return (T*)ptr;
}

template<class T>
T* malloc_mgdT( int num, const char* file, int line ) {
void* ptr;
malloc_mgd( &ptr, num*sizeof(T), file, line );
return (T*)ptr;
}

inline void free_mgd( void* ptr ) {
cudaFree( ptr );
}

void memcpy_sync( void* dst, const void* src, size_t sz,
cudaMemcpyKind type,
const char* file, size_t line );
Expand Down
66 changes: 61 additions & 5 deletions src/popsift/features.cu
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ void FeaturesDev::reset( int num_ext, int num_ori )
if( _ori != nullptr ) { cudaFree( _ori ); _ori = nullptr; }
if( _rev != nullptr ) { cudaFree( _rev ); _rev = nullptr; }

_ext = popsift::cuda::malloc_devT<Feature> ( num_ext, __FILE__, __LINE__ );
_ori = popsift::cuda::malloc_devT<Descriptor>( num_ori, __FILE__, __LINE__ );
_rev = popsift::cuda::malloc_devT<int> ( num_ori, __FILE__, __LINE__ );
_ext = popsift::cuda::malloc_mgdT<Feature> ( num_ext, __FILE__, __LINE__ );
_ori = popsift::cuda::malloc_mgdT<Descriptor>( num_ori, __FILE__, __LINE__ );
_rev = popsift::cuda::malloc_mgdT<int> ( num_ori, __FILE__, __LINE__ );

setFeatureCount( num_ext );
setDescriptorCount( num_ori );
Expand Down Expand Up @@ -248,18 +248,26 @@ show_distance( int3* match_matrix,
if( threadIdx.x == 0 )
{
if( match_matrix[i].z )
printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n",
{
Feature* lx = &l_ext[l_fem[i]];
Feature* rx = &r_ext[r_fem[match_matrix[i].x]];
printf( "accept feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f"
" (%.1f,%.1f)-(%.1f,%.1f)\n",
l_fem[i], i,
r_fem[match_matrix[i].x], match_matrix[i].x,
r_fem[match_matrix[i].y], match_matrix[i].y,
d1, d2 );
d1, d2,
lx->xpos, lx->ypos, rx->xpos, rx->ypos );
}
else
{
printf( "reject feat %4d [%4d] matches feat %4d [%4d] ( 2nd feat %4d [%4d] ) dist %.3f vs %.3f\n",
l_fem[i], i,
r_fem[match_matrix[i].x], match_matrix[i].x,
r_fem[match_matrix[i].y], match_matrix[i].y,
d1, d2 );
}
}
__syncthreads();
}
}
Expand Down Expand Up @@ -303,6 +311,54 @@ void FeaturesDev::match( FeaturesDev* other )
cudaFree( match_matrix );
}

int3* FeaturesDev::matchAndReturn( FeaturesDev* other )
{
int l_len = getDescriptorCount( );
int r_len = other->getDescriptorCount( );

int3* match_matrix = popsift::cuda::malloc_mgdT<int3>( l_len, __FILE__, __LINE__ );

dim3 grid;
grid.x = l_len;
grid.y = 1;
grid.z = 1;
dim3 block;
block.x = 32;
block.y = 1;
block.z = 1;

compute_distance
<<<grid,block>>>
( match_matrix, getDescriptors(), l_len, other->getDescriptors(), r_len );

return match_matrix;
}

void FeaturesDev::freeMatches( int3* match_matrix )
{
popsift::cuda::free_mgd( match_matrix );
}

Descriptor* FeaturesDev::getDescriptor( int descIndex )
{
return &_ori[descIndex];
}

const Descriptor* FeaturesDev::getDescriptor( int descIndex ) const
{
return &_ori[descIndex];
}

Feature* FeaturesDev::getFeatureForDescriptor( int descIndex )
{
return &_ext[_rev[descIndex]];
}

const Feature* FeaturesDev::getFeatureForDescriptor( int descIndex ) const
{
return &_ext[_rev[descIndex]];
}

/*************************************************************
* Feature
*************************************************************/
Expand Down
33 changes: 31 additions & 2 deletions src/popsift/features.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ std::ostream& operator<<( std::ostream& ostr, const FeaturesHost& feature );

class FeaturesDev : public FeaturesBase
{
Feature* _ext;
Descriptor* _ori;
Feature* _ext; // array of extrema
Descriptor* _ori; // array of desciptors
int* _rev; // the reverse map from descriptors to extrema

public:
Expand All @@ -114,11 +114,40 @@ class FeaturesDev : public FeaturesBase

void reset( int num_ext, int num_ori );

/** This function performs one-directional brute force matching on
* the GPU between the Descriptors in this objects and the object
* other.
* The resulting matches are printed.
*/
void match( FeaturesDev* other );

/** This function performs one-directional brute force matching on
* the GPU between the Descriptors in this objects and the object
* other.
* The resulting matches are returned in an array of int3 that must
* be released with a call to cudaFree().
* The length of the array is this->getDescriptorCount().
* For each element at position i
* i is the index of a descriptor in this->getDescriptors()
* int3.x is the index of the best match in other->getDescriptors()
* int3.y is the index of the second best match in other->getDescriptors()
* int3.z indicates if the match is valid (non-zero) or not (zero)
*/
int3* matchAndReturn( FeaturesDev* other );

/** This function takes as parameters that matches returned by
* matchAndReturn and releases that memory.
*/
void freeMatches( int3* match_matrix );

inline Feature* getFeatures() { return _ext; }
inline Descriptor* getDescriptors() { return _ori; }
inline int* getReverseMap() { return _rev; }

Descriptor* getDescriptor( int descIndex );
const Descriptor* getDescriptor( int descIndex ) const;
Feature* getFeatureForDescriptor( int descIndex );
const Feature* getFeatureForDescriptor( int descIndex ) const;
};

} // namespace popsift
Loading