Ponca  4cffe9d86bf544d7691ffcacd98b5202d7de2aab
Point Cloud Analysis library
Loading...
Searching...
No Matches
Screen Space Curvature using Cuda/C++

Introduction

This is an example that use Ponca to compute Screen Space Curvature in C++ using Cuda.

Installation and usage

This example requires the following third-party library :

  • libpng
  • cuda (tested with 9.2).

To compile and run the example, call

cd build && make ssgls
cd examples/cuda && ./ponca_ssgls

This will takes the two input pictures packed in this example (in "data" directory) and compute the curvature for a screenspace neighborhood 10x10 pixels.

Screen-Space Curvature typical input. Left: world coordinates. Right: remapped normal vectors

It will generate this picture

Screen-Space Curvature estimation

Cuda programming

Here are the technical details related to the cuda and C++ biding for screen-space curvature estimation.

Define fitting data structure

class ScreenSpacePoint
{
public:
enum
{
Dim = 3
};
using Scalar = float;
using VectorType = Eigen::Matrix<Scalar, Dim, 1>;
using ScreenVectorType = Eigen::Matrix<Scalar, 2, 1>;
using MatrixType = Eigen::Matrix<Scalar, Dim, Dim>;
PONCA_MULTIARCH inline ScreenSpacePoint(const VectorType& _pos = VectorType::Zero(),
const VectorType& _normal = VectorType::Zero(),
const ScreenVectorType& _spos = ScreenVectorType::Zero())
: m_pos(_pos), m_normal(_normal), m_spos(_spos)
{
}
PONCA_MULTIARCH inline const VectorType& pos() const { return m_pos; }
PONCA_MULTIARCH inline const VectorType& normal() const { return m_normal; }
PONCA_MULTIARCH inline const ScreenVectorType& spos() const { return m_spos; }
PONCA_MULTIARCH inline VectorType& pos() { return m_pos; }
PONCA_MULTIARCH inline VectorType& normal() { return m_normal; }
PONCA_MULTIARCH inline ScreenVectorType& spos() { return m_spos; }
private:
VectorType m_pos, m_normal;
ScreenVectorType m_spos;
};

Define weighting functions

class ProjectedWeightFunc : public Ponca::DistWeightFunc<ScreenSpacePoint, Ponca::SmoothWeightKernel<Scalar>>
{
public:
using Scalar = ScreenSpacePoint::Scalar;
using VectorType = ScreenSpacePoint::VectorType;
using Base = Ponca::DistWeightFunc<ScreenSpacePoint, Ponca::SmoothWeightKernel<Scalar>>;
PONCA_MULTIARCH inline ProjectedWeightFunc(const VectorType& _evalPos = VectorType::Zero(),
const Scalar& _t = Scalar(1.), const Scalar _dz = 0.f)
: Base(_evalPos, _t), m_dz(_dz)
{
}
PONCA_MULTIARCH inline Base::WeightReturnType w(const VectorType& _relativePos,
const ScreenSpacePoint& _attributes) const
{
PONCA_MULTIARCH_STD_MATH(abs);
Scalar d = _attributes.spos().norm();
const float dz = abs(_relativePos[2]);
if (d > m_t || (m_dz != Scalar(0) && dz > m_dz))
{
return {Scalar(0.), _relativePos};
}
return {m_wk.f(d / m_t), _relativePos};
}
private:
float m_dz;
};

Define fitting primitive

using ScreenSpaceFit = Ponca::Basket<ScreenSpacePoint, ProjectedWeightFunc, Ponca::OrientedSphereFit, Ponca::GLSParam>;

Kernel

__global__ void doGLS_kernel(int _imgw, int _imgh, int _scale, float _maxDepthDiff, float* _positions, float* _normals,
float* _result)
{
const int tx = int(threadIdx.x);
const int ty = int(threadIdx.y);
const int bw = int(blockDim.x);
const int bh = int(blockDim.y);
const int x = int(blockIdx.x) * bw + tx;
const int y = int(blockIdx.y) * bh + ty;
const int idx = y * _imgw + x;
if ((x >= _imgw || y >= _imgh))
{
return;
}
if (getVector(x, y, _imgw, _imgh, _normals).squaredNorm() == 0.f)
{
_result[idx] = 0.f;
return;
}
VectorType one = VectorType::Ones();
const auto scale2 = float(_scale * _scale);
// VectorType vvvvv = getVector(x, y, _imgw, _imgh, _positions);
// VectorType nnnnn = getVector(x, y, _imgw, _imgh, _normals);
// _result[idx] = vvvvv(2);
// return;
ScreenSpaceFit fit;
fit.init();
fit.setNeighborFilter({getVector(x, y, _imgw, _imgh, _positions) * 2.f - one, float(_scale), _maxDepthDiff});
_result[idx] = 0.f;
// collect neighborhood
for (int dy = -_scale; dy != _scale + 1; dy++)
{
for (int dx = -_scale; dx != _scale + 1; dx++)
{
float dist2 = float(dy * dy + dx * dx);
// Check if we are in the circular screen-space neighborhood
if (dist2 < scale2)
{
// neighbor ids
const int nx = x + dx;
const int ny = y + dy;
// Check image boundaries
if (nx >= 0 && ny >= 0 && nx < _imgw && ny < _imgh)
{
ScreenSpacePoint::VectorType n = getVector(nx, ny, _imgw, _imgh, _normals);
// add nei only when the normal is properly defined
if (n.squaredNorm() != 0.f)
{
// RGB to XYZ remapping
n = 2.f * n - one;
n.normalize();
ScreenSpacePoint::ScreenVectorType xyCoord;
xyCoord[0] = ScreenSpacePoint::Scalar(dx);
xyCoord[1] = ScreenSpacePoint::Scalar(dy);
ScreenSpacePoint::VectorType p = getVector(nx, ny, _imgw, _imgh, _positions) * 2.f - one;
// GLS computation
fit.addNeighbor(ScreenSpacePoint(p, n, xyCoord));
}
}
}
}
}
// closed form minimization
fit.finalize();
_result[idx] = fit.kappa();
}

Memory access

We format the input data, filled by dimension (in object space) and then by the screen-space coordinates:

__device__ int getId(const int _x, const int _y, const int _width, const int _height, const int _component,
const int _nbComponent)
{
return (_component) + _nbComponent * (_x + _y * _width);
}
__device__ VectorType getVector(const int _x, const int _y, const int _width, const int _height, const float* _buffer)
{
VectorType r;
r << Scalar(_buffer[getId(_x, _y, _width, _height, 0, 3)]), Scalar(_buffer[getId(_x, _y, _width, _height, 1, 3)]),
Scalar(_buffer[getId(_x, _y, _width, _height, 2, 3)]);
return r;
}

The whole code

We use freeimageplus to format input data.

/*
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/*!
* \file examples/cuda/ssgls/ponca_ssgls.cu
* \brief Screen space GLS using c++/CUDA
*/
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <vector>
#include <chrono>
#include <png.h>
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#include <Ponca/src/Fitting/basket.h>
#include <Ponca/src/Fitting/gls.h>
#include <Ponca/src/Fitting/orientedSphereFit.h>
#include <Ponca/src/Fitting/weightFunc.h>
#include <Ponca/src/Fitting/weightKernel.h>
/**************************************************************************************************/
/* IO (source: http://zarb.org/~gc/html/libpng.html ) */
/**************************************************************************************************/
class PNGImage
{
public:
inline bool load(const char* file_name);
[[nodiscard]] inline bool loaded() const { return !row_pointers.empty(); }
inline bool save(const char* file_name);
[[nodiscard]] inline png_uint_32 width() const { return m_width; };
[[nodiscard]] inline png_uint_32 height() const { return m_height; };
[[nodiscard]] inline const std::vector<png_bytep>& buffer() const { return row_pointers; }
inline std::vector<png_bytep>& buffer() { return row_pointers; }
[[nodiscard]] inline png_byte colorType() const { return png_get_color_type(png_ptr, info_ptr); }
~PNGImage()
{
for (auto e : row_pointers)
delete e;
row_pointers.clear();
}
private:
png_uint_32 m_width, m_height;
png_byte color_type;
png_byte bit_depth;
png_structp png_ptr;
png_infop info_ptr;
int number_of_passes;
std::vector<png_bytep> row_pointers;
using vecSizeT = typename std::vector<png_bytep>::size_type;
};
bool PNGImage::load(const char* file_name)
{
unsigned char header[8]; // 8 is the maximum size that can be checked
/* open file and test for it being a png */
FILE* fp = fopen(file_name, "rb");
if (!fp)
{
std::cerr << "[read_png_file] File " << file_name << " could not be opened for reading" << std::endl;
return false;
}
[[maybe_unused]] auto _ = fread(header, 1, 8, fp);
if (png_sig_cmp(header, 0, 8))
{
std::cerr << "[read_png_file] File " << file_name << " is not recognized as a PNG file" << std::endl;
return false;
}
/* initialize stuff */
png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png_ptr)
{
std::cerr << "[read_png_file] png_create_read_struct failed" << std::endl;
return false;
}
info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr)
{
std::cerr << "[read_png_file] png_create_info_struct failed" << std::endl;
return false;
}
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[read_png_file] Error during init_iod" << std::endl;
return false;
}
png_init_io(png_ptr, fp);
png_set_sig_bytes(png_ptr, 8);
png_read_info(png_ptr, info_ptr);
m_width = png_get_image_width(png_ptr, info_ptr);
m_height = png_get_image_height(png_ptr, info_ptr);
color_type = png_get_color_type(png_ptr, info_ptr);
bit_depth = png_get_bit_depth(png_ptr, info_ptr);
number_of_passes = png_set_interlace_handling(png_ptr);
png_read_update_info(png_ptr, info_ptr);
/* read file */
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[read_png_file] Error during read_image" << std::endl;
return false;
}
row_pointers.resize(m_height);
for (vecSizeT y = 0; y < vecSizeT(m_height); y++)
row_pointers[y] = (png_byte*)(malloc(png_get_rowbytes(png_ptr, info_ptr)));
png_read_image(png_ptr, row_pointers.data());
fclose(fp);
return true;
}
bool PNGImage::save(const char* file_name)
{
/* create file */
FILE* fp = fopen(file_name, "wb");
if (!fp)
{
std::cerr << "[write_png_file] File " << file_name << " could not be opened for reading" << std::endl;
return false;
}
/* initialize stuff */
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
if (!png_ptr)
{
std::cerr << "[write_png_file] png_create_write_struct failed" << std::endl;
return false;
}
info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr)
{
std::cerr << "[write_png_file] png_create_info_struct failed" << std::endl;
return false;
}
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[write_png_file] Error during init_io" << std::endl;
return false;
}
png_init_io(png_ptr, fp);
/* write header */
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[write_png_file] Error during writing header" << std::endl;
return false;
}
png_set_IHDR(png_ptr, info_ptr, m_width, m_height, bit_depth, color_type, PNG_INTERLACE_NONE,
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
png_write_info(png_ptr, info_ptr);
/* write bytes */
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[write_png_file] Error during writing bytes" << std::endl;
return false;
}
png_write_image(png_ptr, row_pointers.data());
/* end write */
if (setjmp(png_jmpbuf(png_ptr)))
{
std::cerr << "[write_png_file] Error during end of write" << std::endl;
return false;
}
png_write_end(png_ptr, nullptr);
fclose(fp);
return true;
}
/**************************************************************************************************/
/* Ponca initialization */
/**************************************************************************************************/
//! [mypoint]
class ScreenSpacePoint
{
public:
enum
{
Dim = 3
};
using Scalar = float;
using VectorType = Eigen::Matrix<Scalar, Dim, 1>;
using ScreenVectorType = Eigen::Matrix<Scalar, 2, 1>;
using MatrixType = Eigen::Matrix<Scalar, Dim, Dim>;
PONCA_MULTIARCH inline ScreenSpacePoint(const VectorType& _pos = VectorType::Zero(),
const VectorType& _normal = VectorType::Zero(),
const ScreenVectorType& _spos = ScreenVectorType::Zero())
: m_pos(_pos), m_normal(_normal), m_spos(_spos)
{
}
PONCA_MULTIARCH inline const VectorType& pos() const { return m_pos; }
PONCA_MULTIARCH inline const VectorType& normal() const { return m_normal; }
PONCA_MULTIARCH inline const ScreenVectorType& spos() const { return m_spos; }
PONCA_MULTIARCH inline VectorType& pos() { return m_pos; }
PONCA_MULTIARCH inline VectorType& normal() { return m_normal; }
PONCA_MULTIARCH inline ScreenVectorType& spos() { return m_spos; }
private:
VectorType m_pos, m_normal;
ScreenVectorType m_spos;
};
//! [mypoint]
using Scalar = ScreenSpacePoint::Scalar;
using VectorType = ScreenSpacePoint::VectorType;
using ScreenVectorType = ScreenSpacePoint::ScreenVectorType;
//! [w_def]
class ProjectedWeightFunc : public Ponca::DistWeightFunc<ScreenSpacePoint, Ponca::SmoothWeightKernel<Scalar>>
{
public:
using Scalar = ScreenSpacePoint::Scalar;
using VectorType = ScreenSpacePoint::VectorType;
using Base = Ponca::DistWeightFunc<ScreenSpacePoint, Ponca::SmoothWeightKernel<Scalar>>;
PONCA_MULTIARCH inline ProjectedWeightFunc(const VectorType& _evalPos = VectorType::Zero(),
const Scalar& _t = Scalar(1.), const Scalar _dz = 0.f)
: Base(_evalPos, _t), m_dz(_dz)
{
}
PONCA_MULTIARCH inline Base::WeightReturnType w(const VectorType& _relativePos,
const ScreenSpacePoint& _attributes) const
{
PONCA_MULTIARCH_STD_MATH(abs);
Scalar d = _attributes.spos().norm();
const float dz = abs(_relativePos[2]);
if (d > m_t || (m_dz != Scalar(0) && dz > m_dz))
{
return {Scalar(0.), _relativePos};
}
return {m_wk.f(d / m_t), _relativePos};
}
private:
float m_dz;
};
//! [w_def]
//! [fit_def]
using ScreenSpaceFit = Ponca::Basket<ScreenSpacePoint, ProjectedWeightFunc, Ponca::OrientedSphereFit, Ponca::GLSParam>;
//! [fit_def]
//! [data_acces]
__device__ int getId(const int _x, const int _y, const int _width, const int _height, const int _component,
const int _nbComponent)
{
return (_component) + _nbComponent * (_x + _y * _width);
}
__device__ VectorType getVector(const int _x, const int _y, const int _width, const int _height, const float* _buffer)
{
VectorType r;
r << Scalar(_buffer[getId(_x, _y, _width, _height, 0, 3)]), Scalar(_buffer[getId(_x, _y, _width, _height, 1, 3)]),
Scalar(_buffer[getId(_x, _y, _width, _height, 2, 3)]);
return r;
}
//! [data_acces]
//! [kernel]
__global__ void doGLS_kernel(int _imgw, int _imgh, int _scale, float _maxDepthDiff, float* _positions, float* _normals,
float* _result)
{
const int tx = int(threadIdx.x);
const int ty = int(threadIdx.y);
const int bw = int(blockDim.x);
const int bh = int(blockDim.y);
const int x = int(blockIdx.x) * bw + tx;
const int y = int(blockIdx.y) * bh + ty;
const int idx = y * _imgw + x;
if ((x >= _imgw || y >= _imgh))
{
return;
}
if (getVector(x, y, _imgw, _imgh, _normals).squaredNorm() == 0.f)
{
_result[idx] = 0.f;
return;
}
VectorType one = VectorType::Ones();
const auto scale2 = float(_scale * _scale);
// VectorType vvvvv = getVector(x, y, _imgw, _imgh, _positions);
// VectorType nnnnn = getVector(x, y, _imgw, _imgh, _normals);
// _result[idx] = vvvvv(2);
// return;
ScreenSpaceFit fit;
fit.init();
fit.setNeighborFilter({getVector(x, y, _imgw, _imgh, _positions) * 2.f - one, float(_scale), _maxDepthDiff});
_result[idx] = 0.f;
// collect neighborhood
for (int dy = -_scale; dy != _scale + 1; dy++)
{
for (int dx = -_scale; dx != _scale + 1; dx++)
{
float dist2 = float(dy * dy + dx * dx);
// Check if we are in the circular screen-space neighborhood
if (dist2 < scale2)
{
// neighbor ids
const int nx = x + dx;
const int ny = y + dy;
// Check image boundaries
if (nx >= 0 && ny >= 0 && nx < _imgw && ny < _imgh)
{
ScreenSpacePoint::VectorType n = getVector(nx, ny, _imgw, _imgh, _normals);
// add nei only when the normal is properly defined
if (n.squaredNorm() != 0.f)
{
// RGB to XYZ remapping
n = 2.f * n - one;
n.normalize();
ScreenSpacePoint::ScreenVectorType xyCoord;
xyCoord[0] = ScreenSpacePoint::Scalar(dx);
xyCoord[1] = ScreenSpacePoint::Scalar(dy);
ScreenSpacePoint::VectorType p = getVector(nx, ny, _imgw, _imgh, _positions) * 2.f - one;
// GLS computation
fit.addNeighbor(ScreenSpacePoint(p, n, xyCoord));
}
}
}
}
}
// closed form minimization
fit.finalize();
_result[idx] = fit.kappa();
}
//! [kernel]
/**
* \brief RGB basic color representation
*/
struct Color
{
double r, g, b;
};
/**
* \brief Return Color corresponding to the _value param. Simulating a "seismic" like color map
*/
__host__ Color getColor(float _value, float _valueMin, float _valueMax)
{
Color c = {1.0, 1.0, 1.0};
// Unknown values in our kernel
if (_value == 0.)
{
return c;
}
// Threshold
if (_value < _valueMin)
{
_value = _valueMin;
}
if (_value > _valueMax)
{
_value = _valueMax;
}
// Interval
const double dv = _valueMax - _valueMin;
// Seismic color map like
if (_value < (_valueMin + 0.5 * dv))
{
c.r = 2 * (_value - _valueMin) / dv;
c.g = 2 * (_value - _valueMin) / dv;
c.b = 1;
}
else
{
c.b = 2 - 2 * (_value - _valueMin) / dv;
c.g = 2 - 2 * (_value - _valueMin) / dv;
c.r = 1;
}
return c;
}
/**
* \brief Init input datas to be used on host
*/
__host__ bool initInputDatas(const PNGImage& positions, const PNGImage& normals, std::vector<float>& positionsInfos,
std::vector<float>& normalsInfos, unsigned int& width, unsigned int& height)
{
if (positions.colorType() != PNG_COLOR_TYPE_RGB)
{
std::cerr << "[process_file] color_type of input file must be PNG_COLOR_TYPE_RGB (" << PNG_COLOR_TYPE_RGB
<< ") (is " << positions.colorType() << ")" << std::endl;
return false;
}
width = positions.width();
height = positions.height();
positionsInfos.resize(width * height * 3);
normalsInfos.resize(width * height * 3);
const auto& pbuf = positions.buffer();
const auto& nbuf = normals.buffer();
for (int j = 0; j < height; ++j)
{
png_bytep pcol = pbuf[j];
png_bytep ncol = nbuf[j];
float* pout = positionsInfos.data() + j * width * 3;
float* nout = normalsInfos.data() + j * width * 3;
auto scaleValues = [](const png_byte& in) { return in / 255.f * 2.f - 1.f; };
std::transform(pcol, pcol + width * 3, pout, scaleValues);
std::transform(ncol, ncol + width * 3, nout, scaleValues);
}
return true;
}
/**
* \brief Save _results into png image
*/
__host__ bool saveResult(float* _results, const char* _positionsFilename, const char* _resultFilename)
{
PNGImage result;
if (!result.load(_positionsFilename))
{
fprintf(stderr, "Cannot load positions\n");
return false;
}
const int width = int(result.width());
const int height = int(result.height());
auto pbuf = result.buffer().data();
for (int j = 0; j < height; ++j)
{
float* pin = _results + j * width;
png_bytep col = pbuf[j];
for (int i = 0; i < width; ++i)
{
// check nan
if (std::isnan(pin[i]))
{
pin[i] = 0.f;
}
Color c = getColor(pin[i], -10., 10.);
col[i * 3 + 0] = c.r * 255.;
col[i * 3 + 1] = c.g * 255.;
col[i * 3 + 2] = c.b * 255.;
}
}
if (!result.save(_resultFilename))
{
fprintf(stderr, "Cannot save image\n");
}
return true;
}
__host__ int adjust(int n, int blockSize)
{
if (n < blockSize)
{
return n;
}
return (n / blockSize + (n % blockSize == 0 ? 0 : 1)) * blockSize;
}
int main()
{
const char* positionsFilename = "./data/ssgls_sample_wc.png";
const char* normalsFilename = "./data/ssgls_sample_normal.png";
const char* resultFilename = "./ssgls_results.png";
PNGImage positions, normals;
if (!positions.load(positionsFilename) || !normals.load(normalsFilename))
{
return 0;
}
const float fScale = 10.f;
const float fMaxDepthDiff = 0.00f;
unsigned int width = 0;
unsigned int height = 0;
std::vector<float> positionsInfos, normalsInfos;
if (!initInputDatas(positions, normals, positionsInfos, normalsInfos, width, height))
{
return 0;
}
std::cout << "Image size : " << width << "*" << height << std::endl;
/*********** Init Output ************/
auto* const results = new float[width * height];
std::fill(results, results + width * height, 0.f);
/************* Init device mem *************/
const size_t sizeResults = width * height * sizeof(float);
const size_t sizeImg = width * height * 3 * sizeof(float);
float* positionsInfos_device;
float* normalsInfos_device;
float* results_device;
cudaMalloc(&positionsInfos_device, sizeImg);
cudaMemcpy(positionsInfos_device, positionsInfos.data(), sizeImg, cudaMemcpyHostToDevice);
cudaMalloc(&normalsInfos_device, sizeImg);
cudaMemcpy(normalsInfos_device, normalsInfos.data(), sizeImg, cudaMemcpyHostToDevice);
cudaMalloc(&results_device, sizeResults);
cudaMemcpy(results_device, results, sizeResults, cudaMemcpyHostToDevice);
cudaError_t err = cudaGetLastError();
/************* Memory conf *************/
// calculate grid size
dim3 block(32, 32, 1);
dim3 grid(adjust(int(width), int(block.x)) / block.x, adjust(int(height), int(block.y)) / block.y, 1);
/************* Kernel Call *************/
std::cout << "ssCurvature running..." << std::endl;
// dry run: first call is always slower
doGLS_kernel<<<grid, block>>>(int(width), int(height), int(fScale), fMaxDepthDiff, positionsInfos_device,
normalsInfos_device, results_device);
constexpr int nbrun = 100;
const auto start = std::chrono::system_clock::now();
for (int i = 0; i != nbrun; ++i)
{
doGLS_kernel<<<grid, block>>>(int(width), int(height), int(fScale), fMaxDepthDiff, positionsInfos_device,
normalsInfos_device, results_device);
cudaDeviceSynchronize(); // Wait for the GPU launched work to complete
}
const auto end = std::chrono::system_clock::now();
const std::chrono::duration<double> diff = (end - start) / double(nbrun);
err = cudaGetLastError();
std::cout << "ssCurvature completed in " << diff.count() << " s" << std::endl;
/************* Get Results *************/
cudaMemcpy(results, results_device, sizeResults, cudaMemcpyDeviceToHost);
err = cudaGetLastError();
std::cout << "Finalizing..." << std::endl;
/********** Cuda Free ************/
cudaFree(positionsInfos_device);
cudaFree(normalsInfos_device);
cudaFree(results_device);
err = cudaGetLastError();
/********** Saving _result ************/
if (!saveResult(results, positionsFilename, resultFilename))
{
return 0;
}
/********** Free Memory *********/
delete[] results;
cudaDeviceReset();
err = cudaGetLastError();
std::cout << "Finished !" << std::endl;
return 0;
}