How to remove a nested loop with CUDA Thrust for a

I have two arrays array1 and array2 with n and m elements respectively. I want to find all pair distances between the elements. A brute force algorithm on the CPU is then:

for(int i =0; i<n; i++)
{
    for(int j =0; j<m; j++)
    {
         array_pair_distances[i][j] = array1[i]-array2[j];
    }       
}

Using CUDA Thrust I have simply turned this n*m problem into a n or m problem by using thrust::transform and a single for-loop. My question is how can I remove this last for-loop using Thrust?

EDIT: Added example of implementation with Thrust and one for-loop. The code checks if the pair-distance is greater than 0.1 and returns an int.

#include <stdio.h>
#include <iostream>
#include <cuda.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/reduce.h>

struct PairDistanceCheck : public thrust::binary_function<float,float,int>
{
    __host__ __device__
        int operator()(const float& a, const float& b) const
        {
           if(thrust::get<0>(a) - thrust::get<0>(b) > 0.1)
           {
                return 1;
           } 
           else return 0;
        }
};

void function()
{
    int n = 1000;
    int m = 2000;

    // Initialization of host vectors 
    thrust::host_vector<float> h_1 (n);
    thrust::host_vector<float> h_2 (m);

    // Fill host_vectors with data
    *
    *
    *
    //

    // Copy host_vectors to device_vectors
    thrust::device_vector<float> d_1 = h_1;
    thrust::device_vector<float> d_2 = h_2;

    thrust::device_vector<float> d_temp (m);

    thrust::device_vector<int> d_sum (m);
    thrust::fill(d_sum.begin(), d_sum.end(), 0);

    thrust::device_vector<int> d_result (m);

    for (int i=0; i<n; i++)
    {
        // Filling device_vector d_temp with element i from d_2
        thrust::fill(d_temp.begin(), d_temp.end(), d_2[i]);

        thrust::transform((d_1.begin(), d_1.end(), d_temp.begin(), d_result.begin(), PairDistanceCheck());

        // Summing the vectors
        thrust::transform(d_sum.begin(), d_sum.end(), d_result.begin(), d_sum.begin(), thrust::plus<int>());

    }

    // Final sum
    int sum = thrust::reduce(d_sum.begin(), d_sum.end(), (int) 0, thrust::plus<int>());

    return 0;
}

标签： c++ loops cuda nested thrust

1条回答

在下西门庆

2楼-- · 2019-02-28 09:12

The very short answer is that you can't.

Thrust has no outer product algorithms, which is what would be required to perform the sort of calculation you would be interested in. You could do this by filling two matrices with rows/columns of the two input vectors and then directly subtract those. But that would be very inefficient (both memory and performance) compared to a proper outer product implementation.

0人赞添加讨论(0) 举报

How to remove a nested loop with CUDA Thrust for a

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间