I am working on integrating CUSP solvers into an existing FORTRAN code. As a first step, I am simply trying to pass in a pair of integer arrays and a float (real*4 in FORTRAN) from FORTRAN which will be used to construct and then print a COO format CUSP matrix.
So far, I have been able to follow this thread and got everything to compile and link: Unresolved references using IFORT with nvcc and CUSP
Unfortunately, the program is apparently sending garbage into the CUSP matrix and ends up crashing with the following error:
$./fort_cusp_test
testing 1 2 3
sparse matrix <1339222572, 1339222572> with 1339222568 entries
libc++abi.dylib: terminating with uncaught exception of type thrust::system::system_error: invalid argument
Program received signal SIGABRT: Process abort signal.
Backtrace for this error:
#0 0x10ff86ff6
#1 0x10ff86593
#2 0x7fff8593ff19
Abort trap: 6
The code for the cuda and fortran sources are as follows:
cusp_runner.cu
#include <stdio.h>
#include <cusp/coo_matrix.h>
#include <iostream>
#include <cusp/krylov/cg.h>
#include <cusp/print.h>
#if defined(__cplusplus)
extern "C" {
#endif
void test_coo_mat_print_(int * row_i, int * col_j, float * val_v, int n, int nnz ) {
//wrap raw input pointers with thrust::device_ptr
thrust::device_ptr<int> wrapped_device_I(row_i);
thrust::device_ptr<int> wrapped_device_J(col_j);
thrust::device_ptr<float> wrapped_device_V(val_v);
//use array1d_view to wrap individual arrays
typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceIndexArrayView;
typedef typename cusp::array1d_view< thrust::device_ptr<float> > DeviceValueArrayView;
DeviceIndexArrayView row_indices(wrapped_device_I, wrapped_device_I + n);
DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + nnz);
DeviceValueArrayView values(wrapped_device_V, wrapped_device_V + nnz);
//combine array1d_views into coo_matrix_view
typedef cusp::coo_matrix_view<DeviceIndexArrayView,DeviceIndexArrayView,DeviceValueArrayView> DeviceView;
//construct coo_matrix_view from array1d_views
DeviceView A(n,n,nnz,row_indices,column_indices,values);
cusp::print(A);
}
#if defined(__cplusplus)
}
#endif
fort_cusp_test.f90
program fort_cuda_test
implicit none
interface
subroutine test_coo_mat_print_(row_i,col_j,val_v,n,nnz) bind(C)
use, intrinsic :: ISO_C_BINDING, ONLY: C_INT,C_FLOAT
implicit none
integer(C_INT) :: n, nnz, row_i(:), col_j(:)
real(C_FLOAT) :: val_v(:)
end subroutine test_coo_mat_print_
end interface
integer*4 n
integer*4 nnz
integer*4, target :: rowI(9),colJ(9)
real*4, target :: valV(9)
integer*4, pointer :: row_i(:)
integer*4, pointer :: col_j(:)
real*4, pointer :: val_v(:)
n = 3
nnz = 9
rowI = (/ 1, 1, 1, 2, 2, 2, 3, 3, 3/)
colJ = (/ 1, 2, 3, 1, 2, 3, 1, 2, 3/)
valV = (/ 1, 2, 3, 4, 5, 6, 7, 8, 9/)
row_i => rowI
col_j => colJ
val_v => valV
write(*,*) "testing 1 2 3"
call test_coo_mat_print_(row_i,col_j,val_v,n,nnz)
end program fort_cuda_test
If you want to try it yourself, here is my (rather inelegant) makefile:
Test:
nvcc -Xcompiler="-fPIC" -shared cusp_runner.cu -o cusp_runner.so -I/Developer/NVIDIA/CUDA-6.5/include/cusp
gfortran -c fort_cusp_test.f90
gfortran fort_cusp_test.o cusp_runner.so -L/Developer/NVIDIA/CUDA-6.5/lib -lcudart -o fort_cusp_test
clean:
rm *.o *.so
The library paths will need to be changed as appropriate of course.
Can anyone point me in the right direction for how to properly pass in the needed arrays from the fortran code?
with the interface block removed and addition of a print statement at the start of the C function I can see that the arrays are being passed correctly, but n and nnz are causing problems. I get the following output:
$ ./fort_cusp_test
testing 1 2 3
n: 1509677596, nnz: 1509677592
i, row_i, col_j, val_v
0, 1, 1, 1.0000e+00
1, 1, 2, 2.0000e+00
2, 1, 3, 3.0000e+00
3, 2, 1, 4.0000e+00
4, 2, 2, 5.0000e+00
5, 2, 3, 6.0000e+00
6, 3, 1, 7.0000e+00
7, 3, 2, 8.0000e+00
8, 3, 3, 9.0000e+00
9, 0, 32727, 0.0000e+00
...
etc
...
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x105ce7ff6
#1 0x105ce7593
#2 0x7fff8593ff19
#3 0x105c780a2
#4 0x105c42dbc
#5 0x105c42df4
Segmentation fault: 11
fort_cusp_test
interface
subroutine test_coo_mat_print_(row_i,col_j,val_v,n,nnz) bind(C)
use, intrinsic :: ISO_C_BINDING, ONLY: C_INT,C_FLOAT
implicit none
integer(C_INT),value :: n, nnz
integer(C_INT) :: row_i(:), col_j(:)
real(C_FLOAT) :: val_v(:)
end subroutine test_coo_mat_print_
end interface
integer*4 n
integer*4 nnz
integer*4, target :: rowI(9),colJ(9)
real*4, target :: valV(9)
integer*4, pointer :: row_i(:)
integer*4, pointer :: col_j(:)
real*4, pointer :: val_v(:)
n = 3
nnz = 9
rowI = (/ 1, 1, 1, 2, 2, 2, 3, 3, 3/)
colJ = (/ 1, 2, 3, 1, 2, 3, 1, 2, 3/)
valV = (/ 1, 2, 3, 4, 5, 6, 7, 8, 9/)
row_i => rowI
col_j => colJ
val_v => valV
write(*,*) "testing 1 2 3"
call test_coo_mat_print_(rowI,colJ,valV,n,nnz)
end program fort_cuda_test
cusp_runner.cu
#include <stdio.h>
#include <cusp/coo_matrix.h>
#include <iostream>
// #include <cusp/krylov/cg.h>
#include <cusp/print.h>
#if defined(__cplusplus)
extern "C" {
#endif
void test_coo_mat_print_(int * row_i, int * col_j, float * val_v, int n, int nnz ) {
printf("n: %d, nnz: %d\n",n,nnz);
printf("%6s, %6s, %6s, %12s \n","i","row_i","col_j","val_v");
for(int i=0;i<n;i++) {
printf("%6d, %6d, %6d, %12.4e\n",i,row_i[i],col_j[i],val_v[i]);
}
if ( false ) {
//wrap raw input pointers with thrust::device_ptr
thrust::device_ptr<int> wrapped_device_I(row_i);
thrust::device_ptr<int> wrapped_device_J(col_j);
thrust::device_ptr<float> wrapped_device_V(val_v);
//use array1d_view to wrap individual arrays
typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceIndexArrayView;
typedef typename cusp::array1d_view< thrust::device_ptr<float> > DeviceValueArrayView;
DeviceIndexArrayView row_indices(wrapped_device_I, wrapped_device_I + n);
DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + nnz);
DeviceValueArrayView values(wrapped_device_V, wrapped_device_V + nnz);
//combine array1d_views into coo_matrix_view
typedef cusp::coo_matrix_view<DeviceIndexArrayView,DeviceIndexArrayView,DeviceValueArrayView> DeviceView;
//construct coo_matrix_view from array1d_views
DeviceView A(n,n,nnz,row_indices,column_indices,values);
cusp::print(A); }
}
#if defined(__cplusplus)
}
#endif