Retain Duplicates with Set Intersection in CUDA

2020-04-23 05:49发布

问题:

I'm using CUDA and THRUST to perform paired set operations. I would like to retain duplicates, however. For example:

int keys[6] = {1, 1, 1, 3, 4, 5, 5};
int vals[6] = {1, 2, 3, 4, 5, 6, 7};
int comp[2] = {1, 5};

thrust::set_intersection_by_key(keys, keys + 6, comp, comp + 2, vals, rk, rv);

Desired result

rk[1, 1, 1, 5, 5]
rv[1, 2, 3, 6, 7]

Actual Result

rk[1, 5]
rv[5, 7]

I want all of the vals where the corresponding key is contained in comp.

Is there any way to achieve this using thrust, or do I have to write my own kernel or thrust function?

I'm using this function: set_intersection_by_key.

回答1:

Quoting from the thrust documentation:

The generalization is that if an element appears m times in [keys_first1, keys_last1) and n times in [keys_first2, keys_last2) (where m may be zero), then it appears min(m,n) times in the keys output range

Since comp does only contain each key once, n=1 and therefore min(m,1) = 1.

In order to get "all of the vals where the corresponding key is contained in comp", you can use the approach of my answer to a similar problem.

Similarly, the example code does the following steps:

  1. Get the largest element of d_comp. This assumes that d_comp is already sorted.

  2. Create vector d_map of size largest_element+1. Copy 1 to all positions of the entries of d_comp in d_map.

  3. Copy all entries from d_vals for which there is a 1 entry in d_map into d_result.

    #include <thrust/device_vector.h>
    #include <thrust/iterator/constant_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/functional.h>
    #include <thrust/copy.h>
    #include <thrust/scatter.h>
    #include <iostream>
    
    
    #define PRINTER(name) print(#name, (name))
    void print(const char* name, const thrust::device_vector<int>& v)
    {
        std::cout << name << ":\t";
        thrust::copy(v.begin(), v.end(), std::ostream_iterator<int>(std::cout, "\t"));
        std::cout << std::endl;
    }
    
    int main()
    {
        int keys[] = {1, 1, 1, 3, 4, 5, 5};
        int vals[] = {1, 2, 3, 4, 5, 6, 7};
        int comp[] = {1, 5};
    
        const int size_data = sizeof(keys)/sizeof(keys[0]);
        const int size_comp = sizeof(comp)/sizeof(comp[0]);
    
        // copy data to GPU
        thrust::device_vector<int> d_keys (keys, keys+size_data);
        thrust::device_vector<int> d_vals (vals, vals+size_data);
        thrust::device_vector<int> d_comp (comp, comp+size_comp);
    
        PRINTER(d_keys);
        PRINTER(d_vals);
        PRINTER(d_comp);
    
        int largest_element = d_comp.back();
    
        thrust::device_vector<int> d_map(largest_element+1);
    
        thrust::constant_iterator<int> one(1);
        thrust::scatter(one, one+size_comp, d_comp.begin(), d_map.begin());
        PRINTER(d_map);
    
        thrust::device_vector<int> d_result(size_data);
        using namespace thrust::placeholders;
        int final_size = thrust::copy_if(d_vals.begin(),
                                        d_vals.end(),
                                        thrust::make_permutation_iterator(d_map.begin(), d_keys.begin()),
                                        d_result.begin(),
                                        _1
                                        ) - d_result.begin();
        d_result.resize(final_size);
    
        PRINTER(d_result);
    
        return 0;
    }
    

output:

d_keys:     1   1   1   3   4   5   5   
d_vals:     1   2   3   4   5   6   7   
d_comp:     1   5   
d_map:      0   1   0   0   0   1   
d_result:   1   2   3   6   7   


标签: cuda thrust