thrust operations empty host array

I want to do some thrust operations but I am not sure how exactly.

Right now , I am receiving am array full of zeros ( the h_a array)

I have :

#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>

#include <cuda.h>
#include <cuda_runtime_api.h>

#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>


template <typename T>
struct square
{
    __host__ __device__
    T operator()( const T& x ) const
    {
        return x * x;
    }

};


int
main(
             int argc,
    const char * argv[] )
{
    const size_t NbOfPoints  = 256;

    int BlocksPerGridX    = 16;
    int BlocksPerGridY    = 16;

    int ThreadsPerBlockX  = 16;
    int ThreadsPerBlockY  = 16;

    // generate random data on the host
    thrust::host_vector<float> h_Kx ( NbOfPoints );
    thrust::generate( h_Kx.begin(), h_Kx.end(), rand );

    thrust::host_vector<float> h_Ky ( NbOfPoints );
    thrust::generate( h_Ky.begin(), h_Ky.end(), rand );

    // transfer to device
    thrust::device_vector<float> dev_Kx = h_Kx;
    thrust::device_vector<float> dev_Ky = h_Ky;

    // create arrays for holding the number of threads per block in each dimension
    int * X , * Y;
    cudaMalloc((void **) &X, ThreadsPerBlockX * BlocksPerGridX * sizeof(*X) );
    cudaMalloc((void **) &Y, ThreadsPerBlockY * BlocksPerGridY * sizeof(*Y) );

    // wrap raw pointer with a device_ptr
    thrust::device_ptr<int> dev_X ( X );
    thrust::device_ptr<int> dev_Y ( Y );

    // use device_ptr in Thrust algorithms
    thrust::fill( dev_X, dev_X + ( ThreadsPerBlockX * BlocksPerGridX ) , (int) 0 );
    thrust::fill( dev_Y, dev_Y + ( ThreadsPerBlockY * BlocksPerGridY ) , (int) 0 );

    // setup arguments
    square<float> square_op;

    // create various vectors
    thrust::device_vector<int> distX ( NbOfPoints );
    thrust::device_vector<int> distY ( NbOfPoints );
    thrust::device_vector<unsigned int> Tmp ( NbOfPoints );
    thrust::host_vector<unsigned int> h_a ( NbOfPoints );
    thrust::device_vector<unsigned int> distXSquared ( NbOfPoints );
    thrust::device_vector<unsigned int> distYSquared ( NbOfPoints );


    // compute distX = dev_Kx - dev_X and distY = dev_Ky - dev_Y
    thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
    thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

    //square distances
    thrust::transform( distX.begin(), distX.end(), distXSquared.begin(), square_op );
    thrust::transform( distY.begin(), distY.end(), distYSquared.begin(), square_op );

    // compute Tmp =  distX + distY
    thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );
    thrust::copy( Tmp.begin(), Tmp.end(), h_a.begin() );


    for ( int i = 0; i < 5; i ++ )
        printf("\n temp = %u",h_a[ i ] );


return 0;
}

UPDATE:

Apart the edits from Robert Crovella , you must edit to integers:

square<int> square_op;
thrust::transform( dev_Kx.begin(), dev_Kx.end(), dev_X , distX.begin() , thrust::minus<int>() );
thrust::transform( dev_Ky.begin(), dev_Ky.end(), dev_Y , distY.begin() , thrust::minus<int>() );

标签： cuda thrust

1条回答

趁早两清

2楼-- · 2019-09-20 12:39

You've got several instances of doing zero-length transforms:

thrust::transform( dev_Kx.begin(), dev_Kx.begin(), dev_X , distX.begin() , thrust::minus<float>() );
thrust::transform( dev_Ky.begin(), dev_Ky.begin(), dev_Y , distY.begin() , thrust::minus<float>() );

and:

thrust::transform( distXSquared.begin() ,distXSquared.begin() , distYSquared.begin() , Tmp.begin() , thrust::plus<unsigned int>() );

Since the first two parameters to each of the above transforms is the same, the work being done is zero. Presumably you want the corresponding .end() iterators in the second position rather than .begin()

When I make those changes, I got non-zero values printed out. They are quite large, but you appear to be squaring large values, so I'm not sure what your intent is.

0人赞添加讨论(0) 举报

thrust operations empty host array

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间