How to remove a nested loop with CUDA Thrust for a

2019-02-28 08:20发布

问题:

I have two arrays array1 and array2 with n and m elements respectively. I want to find all pair distances between the elements. A brute force algorithm on the CPU is then:

for(int i =0; i<n; i++)
{
    for(int j =0; j<m; j++)
    {
         array_pair_distances[i][j] = array1[i]-array2[j];
    }       
}

Using CUDA Thrust I have simply turned this n*m problem into a n or m problem by using thrust::transform and a single for-loop. My question is how can I remove this last for-loop using Thrust?

EDIT: Added example of implementation with Thrust and one for-loop. The code checks if the pair-distance is greater than 0.1 and returns an int.

#include <stdio.h>
#include <iostream>
#include <cuda.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/fill.h>
#include <thrust/transform.h>
#include <thrust/reduce.h>

struct PairDistanceCheck : public thrust::binary_function<float,float,int>
{
    __host__ __device__
        int operator()(const float& a, const float& b) const
        {
           if(thrust::get<0>(a) - thrust::get<0>(b) > 0.1)
           {
                return 1;
           } 
           else return 0;
        }
};

void function()
{
    int n = 1000;
    int m = 2000;

    // Initialization of host vectors 
    thrust::host_vector<float> h_1 (n);
    thrust::host_vector<float> h_2 (m);

    // Fill host_vectors with data
    *
    *
    *
    //

    // Copy host_vectors to device_vectors
    thrust::device_vector<float> d_1 = h_1;
    thrust::device_vector<float> d_2 = h_2;

    thrust::device_vector<float> d_temp (m);

    thrust::device_vector<int> d_sum (m);
    thrust::fill(d_sum.begin(), d_sum.end(), 0);

    thrust::device_vector<int> d_result (m);

    for (int i=0; i<n; i++)
    {
        // Filling device_vector d_temp with element i from d_2
        thrust::fill(d_temp.begin(), d_temp.end(), d_2[i]);

        thrust::transform((d_1.begin(), d_1.end(), d_temp.begin(), d_result.begin(), PairDistanceCheck());

        // Summing the vectors
        thrust::transform(d_sum.begin(), d_sum.end(), d_result.begin(), d_sum.begin(), thrust::plus<int>());

    }

    // Final sum
    int sum = thrust::reduce(d_sum.begin(), d_sum.end(), (int) 0, thrust::plus<int>());

    return 0;
}

回答1:

The very short answer is that you can't.

Thrust has no outer product algorithms, which is what would be required to perform the sort of calculation you would be interested in. You could do this by filling two matrices with rows/columns of the two input vectors and then directly subtract those. But that would be very inefficient (both memory and performance) compared to a proper outer product implementation.