Ponca  eff04be8be0ed1ccd36b694a34ae55d988e046fb
Point Cloud Analysis library
Loading...
Searching...
No Matches
Object-space Curvature using Cuda/C++ and KdTree queries

Introduction

This is an example that use Ponca to compute curvatures in C++ using Cuda, and querying neighbors on the GPU using a KdTree.

To compile and run the example, call

cd build && make cuda_fit_plane_kdtree
cd examples/cuda && ./cuda_fit_plane_kdtree

Source code

#include <Ponca/src/Fitting/basket.h>
#include <Ponca/src/Fitting/covariancePlaneFit.h>
#include <Ponca/src/Fitting/meanPlaneFit.h>
#include <Ponca/src/Fitting/weightFunc.h>
#include <Ponca/src/Fitting/weightKernel.h>
#include <Ponca/src/Common/pointTypes.h>
#include <Ponca/src/Common/pointGeneration.h>
#include <Ponca/src/SpatialPartitioning/KdTree/kdTree.h>
#include <Ponca/src/SpatialPartitioning/KdTree/kdTreeTraits.h>
#include <iostream>
#include "cuda_utils.cu"
/*! \brief Computes the fitting process for each point of the point cloud and returns the potential and primitive gradient result.
*
* \tparam DataPoint The DataPoint type.
* \tparam Fit The Fit that will be computed by the Kernel.
* \param buffers The buffers structure that holds the internal containers of the KdTree.
* \param analysisScale The radius of the neighborhood.
* \param potentialResults As an Output, the potential results of the fit for each point of the Point Cloud.
* \param gradientResults As an Output, the primitiveGradient results of the fit for each point of the Point Cloud.
*/
template<typename DataPoint, typename Fit>
__global__ void fitPotentialAndGradientKernel(
typename KdTreeGPU<DataPoint>::Buffers* const buffers,
const typename DataPoint::Scalar analysisScale,
typename DataPoint::Scalar* const potentialResults,
typename DataPoint::Scalar* const gradientResults
) {
using VectorType = typename DataPoint::VectorType;
// Get global index
const unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
// Skip when not in the point cloud
if (i >= buffers->points_size) return;
//! [Create KdTree on the GPU]
KdTreeGPU<DataPoint> kdtree(*buffers);
//! [Create KdTree on the GPU]
// Set up the fit
Fit fit;
VectorType pos = kdtree.points()[i].pos();
fit.setNeighborFilter({ pos, analysisScale });
//! [Use KdTree on the GPU]
fit.computeWithIds(kdtree.rangeNeighbors(i, analysisScale), kdtree.points());
//! [Use KdTree on the GPU]
// Returns NaN if not stable
if (! fit.isStable()) {
potentialResults[i] = NAN;
for (int d = 0; d < DataPoint::Dim; ++d) {
gradientResults[i*DataPoint::Dim + d] = NAN;
}
return;
}
// Return the fit.potential result as an output
potentialResults[i] = fit.potential(pos);
const VectorType grad = fit.primitiveGradient(pos);
for (int d = 0; d < DataPoint::Dim; ++d) {
gradientResults[i*DataPoint::Dim + d] = grad(d);
}
}
/*! \brief Test a MeanPlaneFit on a plane using the CUDA kernel
*
* \tparam Scalar The scalar type (e.g. double, float or long double...)
* \tparam Dim The number of dimension that the VectorType will have.
* \param _bUnoriented Generates an unoriented point cloud.
* \param _bAddPositionNoise Determines if we add a randomly generated offset to the position.
* \param _bAddNormalNoise Determines if we add a randomly generated offset to the normal.
*/
template<typename Scalar, int Dim>
__host__ void testPlaneCuda(
const bool _bUnoriented = false,
const bool _bAddPositionNoise = false,
const bool _bAddNormalNoise = false
) {
typedef Ponca::PointPositionNormal<Scalar, Dim> DataPoint;
typedef Ponca::DistWeightFunc<DataPoint, Ponca::SmoothWeightKernel<Scalar> > WeightSmoothFunc;
typedef Ponca::Basket<DataPoint, WeightSmoothFunc, Ponca::MeanPlaneFit> MeanFitSmooth;
typedef typename DataPoint::VectorType VectorType;
// Point cloud parameters for the plane
const unsigned int nbPoints = Eigen::internal::random<int>(100, 1000);
const Scalar width = Eigen::internal::random<Scalar>(1., 10.);
const Scalar height = width;
const Scalar analysisScale = Scalar(15.) * std::sqrt( width * height / nbPoints);
const Scalar centerScale = Eigen::internal::random<Scalar>(1, 10000);
const VectorType center = VectorType::Random() * centerScale;
const VectorType direction = VectorType::Random().normalized();
// Generate the point cloud
std::vector<DataPoint> points(nbPoints);
for(unsigned int i = 0; i < nbPoints; ++i) {
points[i] = Ponca::getPointOnPlane<DataPoint>(
center, direction, width,
_bAddPositionNoise, _bAddNormalNoise, _bUnoriented
);
}
//! [Build KdTree on CPU]
Ponca::KdTreeDense<DataPoint> kdtree(points);
//! [Build KdTree on CPU]
std::cout << "Number of nodes in the KdTree : " << kdtree.nodeCount() << std::endl;
// The size of the data we send between Host and Device
const unsigned long scalarBufferSize = nbPoints * sizeof(Scalar);
const unsigned long vectorBufferSize = scalarBufferSize * Dim;
//! [Copy KdTree on GPU]
using BuffersGPU = typename KdTreeGPU<DataPoint>::Buffers;
BuffersGPU* kdtreeBuffersDevice;
CUDA_CHECK(cudaMalloc(&kdtreeBuffersDevice, sizeof(BuffersGPU)));
BuffersGPU hostBuffersHoldingDevicePointers; // Host Buffers referencing data on the device, used to free memory
deepCopyBuffersToDevice<Ponca::KdTreePointerTraits<DataPoint>>(kdtree.buffers(), hostBuffersHoldingDevicePointers, kdtreeBuffersDevice);
//! [Copy KdTree on GPU]
// Prepare output buffers
auto* const potentialResults = new Scalar[nbPoints];
auto* const gradientResults = new Scalar[nbPoints*Dim];
Scalar* potentialResultsDevice;
Scalar* gradientResultsDevice;
CUDA_CHECK(cudaMalloc(&potentialResultsDevice, scalarBufferSize));
CUDA_CHECK(cudaMalloc(&gradientResultsDevice , vectorBufferSize));
// Set block and grid size depending on number of points
constexpr unsigned int blockSize = 128;
const unsigned int gridSize = (nbPoints + blockSize - 1) / blockSize;
// Compute the fitting in the kernel
fitPotentialAndGradientKernel<DataPoint, MeanFitSmooth><<<gridSize, blockSize>>>(
kdtreeBuffersDevice, analysisScale, // Inputs
potentialResultsDevice, gradientResultsDevice // Outputs
);
CUDA_CHECK(cudaGetLastError()); // Catch kernel launch errors
CUDA_CHECK(cudaDeviceSynchronize());
// Fetch the results (Device to Host)
CUDA_CHECK(cudaMemcpy(potentialResults, potentialResultsDevice, scalarBufferSize, cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaMemcpy(gradientResults , gradientResultsDevice , vectorBufferSize, cudaMemcpyDeviceToHost));
// Free CUDA memory
CUDA_CHECK(cudaFree(potentialResultsDevice));
CUDA_CHECK(cudaFree(gradientResultsDevice));
freeBuffersOnDevice(hostBuffersHoldingDevicePointers);
CUDA_CHECK(cudaFree(kdtreeBuffersDevice));
// Validate results
const auto epsilon = Scalar(0.001);
for (int i = 0; i < nbPoints; ++i) {
const VectorType primGrad = Eigen::Map< const VectorType >(gradientResults + Dim*i );
std::cout << "i:" << i << ", potential:" << potentialResults[i] << ", ";
std::cout << "primitiveGradient:" << primGrad.transpose() << " ; " << std::endl;
if (std::abs(potentialResults[i]) > epsilon || Scalar(1.) - std::abs(primGrad.dot(direction)) > epsilon)
{
std::cerr << "Test failed in " << __FILE__ << " (" << __LINE__ << ")" << std::endl;
abort();
}
}
delete[] potentialResults;
delete[] gradientResults;
}
__host__ int main(const int /*argc*/, char** /*argv*/) {
std::cout << "Test plane fitting on CUDA..." << std::endl;
testPlaneCuda<float, 3>();
std::cout << "(ok)" << std::endl;
}