My following code got an error, when it runs, some of the graph weights are being overwritten, but that should not be happening with the Xa array(which keeps which ones have already been visited) and the __syncthreads() function... May someone help?
struct Node
{
int begin; // begining of the substring
int num; // size of the sub-string
};
__global__ void BFS (Node *Va, int *Ea, bool *Fa, bool *Xa, int *Ca, bool *parada)
{
int tid = threadIdx.x;
if (Fa[tid] == true && Xa[tid] == false)
{
Fa[tid] = false;
__syncthreads();
// Va begin is where it's edges' subarray begins, Va is it's
// number of elements
for (int i = Va[tid].begin; i < (Va[tid].begin + Va[tid].num); i++)
{
int nid = Ea[i];
if (Xa[nid] == false)
{
Ca[nid] = Ca[tid] + 1;
Fa[nid] = true;
*parada = true;
}
}
Xa[tid] = true;
}
}
// The BFS frontier corresponds to all the nodes being processed
// at the current level.
int main()
{
//descrição do grafo
struct Node node[4];
node[0].begin=0;
node[0].num=2;
node[1].begin=1;
node[1].num=0;
node[2].begin=2;
node[2].num=2;
node[3].begin=1;
node[3].num=0;
int edges[]={1,2,3,1};
bool frontier[4]={false};
bool visited[4]={false};
int custo[4]={0};
int source=0;
frontier[source]=true;
Node* Va;
cudaMalloc((void**)&Va,sizeof(Node)*4);
cudaMemcpy(Va,node,sizeof(Node)*4,cudaMemcpyHostToDevice);
int* Ea;
cudaMalloc((void**)&Ea,sizeof(Node)*4);
cudaMemcpy(Ea,edges,sizeof(Node)*4,cudaMemcpyHostToDevice);
bool* Fa;
cudaMalloc((void**)&Fa,sizeof(bool)*4);
cudaMemcpy(Fa,frontier,sizeof(bool)*4,cudaMemcpyHostToDevice);
bool* Xa;
cudaMalloc((void**)&Xa,sizeof(bool)*4);
cudaMemcpy(Xa,visited,sizeof(bool)*4,cudaMemcpyHostToDevice);
int* Ca;
cudaMalloc((void**)&Ca,sizeof(int)*4);
cudaMemcpy(Ca,custo,sizeof(int)*4,cudaMemcpyHostToDevice);
dim3 threads(4,1,1);
bool para;
bool* parada;
cudaMalloc((void**)¶da,sizeof(bool));
printf("\n");
int n=1;
do{
para=false;
cudaMemcpy(parada,¶,sizeof(bool),cudaMemcpyHostToDevice);
BFS <<<1,threads>>>(Va,Ea,Fa,Xa,Ca,parada);
CUT_CHECK_ERROR("kernel1 execution failed");
cudaMemcpy(¶,parada,sizeof(bool),cudaMemcpyDeviceToHost);
printf("Run number: %d >> ",n);
cudaMemcpy(custo,Ca,sizeof(int)*4,cudaMemcpyDeviceToHost);
for(int i=0;i<4;i++)
printf("%d ",custo[i]);
printf("\n");
n++;
}while(para);
printf("\nFinal:\n");
cudaMemcpy(custo,Ca,sizeof(int)*4,cudaMemcpyDeviceToHost);
for(int i=0;i<4;i++)
printf("%d ",custo[i]);
printf("\n");
}