I have a device vector A,B,C as following.
A = [1,1,3,3,3,4,4,5,5]
B = [1,3,5]
C = [2,8,6]
So I want to replace each of B in a with corresponding element in C.
Eg:
- 1 is replaced by 2,
- 3 is replaced by 8,
- 5 is replaced by 6
so as to get the following result
Result = [2,2,8,8,8,4,4,6,6]
How do I achieve this in cuda thrust or any way of implementing it in cuda C++. I found thrust::replace which replaces single element at once. Since I need to replace huge amount of data, it becomes bottleneck to replace one at a time.
This can be done efficiently by first building a map and then applying a custom functor which queries the map.
The example code does the following steps:
Get the largest element of C
. This assumes that your data is already sorted.
Create a map vector of size largest_element
. Copy the new values at the position of the old ones.
Apply the mapper
functor to A
. This functor reads new_value
from the map vector. If this new_value
is not 0
, the value in A
is replaced by the new value. This assumes that C
will never contain 0
. If it can contain 0
, you must use another condition, e.g. initialize the map vector with -1
and check if new_value != -1
#include <thrust/device_vector.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/copy.h>
#include <thrust/for_each.h>
#include <thrust/scatter.h>
#include <iostream>
#define PRINTER(name) print(#name, (name))
template <template <typename...> class V, typename T, typename ...Args>
void print(const char* name, const V<T,Args...> & v)
{
std::cout << name << ":\t";
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t"));
std::cout << std::endl;
}
template <typename T>
struct mapper
{
mapper(thrust::device_ptr<const T> map) : map(map)
{
}
__host__ __device__
void operator()(T& value) const
{
const T& new_value = map[value];
if (new_value)
{
value = new_value;
}
}
thrust::device_ptr<const T> map;
};
int main()
{
using namespace thrust::placeholders;
int A[] = {1,1,3,3,3,4,4,5,5};
int B[] = {1,3,5};
int C[] = {2,8,6};
int size_data = sizeof(A)/sizeof(A[0]);
int size_replace = sizeof(B)/sizeof(B[0]);
// copy demo data to GPU
thrust::device_vector<int> d_A (A, A+size_data);
thrust::device_vector<int> d_B (B, B+size_replace);
thrust::device_vector<int> d_C (C, C+size_replace);
PRINTER(d_A);
PRINTER(d_B);
PRINTER(d_C);
int largest_element = d_C.back();
thrust::device_vector<int> d_map(largest_element);
thrust::scatter(d_C.begin(), d_C.end(), d_B.begin(), d_map.begin());
PRINTER(d_map);
thrust::for_each(d_A.begin(), d_A.end(), mapper<int>(d_map.data()));
PRINTER(d_A);
return 0;
}
output
d_A: 1 1 3 3 3 4 4 5 5
d_B: 1 3 5
d_C: 2 8 6
d_map: 0 2 0 8 0 6
d_A: 2 2 8 8 8 4 4 6 6