Question
Consider the following code:
program example
implicit none
integer, parameter :: n_coeffs = 1000
integer, parameter :: n_indices = 5
integer :: i
real(8), dimension(n_coeffs) :: coeff
integer, dimension(n_coeffs,n_indices) :: index
do i = 1, n_coeffs
coeff(i) = real(i*3,8)
index(i,:) = [2,4,8,16,32]*i
end do
end
For any 5 dimensional index I need to obtain the associated coefficient, without knowing or calculating
i
. For instance, given[2,4,8,16,32]
I need to obtain3.0
without computingi
.Is there a reasonable solution, perhaps using sparse matrices, that would work for
n_indices
in the order of 100 (thoughn_coeffs
still in the order of 1000)?
A Bad Solution
One solution would be to define a 5 dimensional array as in
real(8), dimension(2000,4000,8000,16000,32000) :: coeff2
do i = 1, ncoeffs
coeff2(index(i,1),index(i,2),index(i,3),index(i,4),index(i,5)) = coeff(i)
end do
then, to get the coefficient associated with [2,4,8,16,32]
, call
coeff2(2,4,8,16,32)
However, besides being very wasteful of memory, this solution would not allow n_indices
to be set to a number higher than 7 given the limit of 7 dimensions to an array.
OBS: This question is a spin-off of this one. I have tried to ask the question more precisely having failed in the first attempt, an effort that greatly benefited from the answer of @Rodrigo_Rodrigues.
Actual Code
In case it helps here is the code for the actual problem I am trying to solve. It is an adaptive sparse grid method for approximating a function. The main goal is to make the interpolation at the and as fast as possible:
MODULE MOD_PARAMETERS
IMPLICIT NONE
SAVE
INTEGER, PARAMETER :: d = 2 ! number of dimensions
INTEGER, PARAMETER :: L_0 = 4 ! after this adaptive grid kicks in, for L <= L_0 usual sparse grid
INTEGER, PARAMETER :: L_max = 9 ! maximum level
INTEGER, PARAMETER :: bound = 0 ! 0 -> for f = 0 at boundary
! 1 -> adding grid points at boundary
! 2 -> extrapolating close to boundary
INTEGER, PARAMETER :: max_error = 1
INTEGER, PARAMETER :: L2_error = 1
INTEGER, PARAMETER :: testing_sample = 1000000
REAL(8), PARAMETER :: eps = 0.01D0 ! epsilon for adaptive grid
END MODULE MOD_PARAMETERS
PROGRAM MAIN
USE MOD_PARAMETERS
IMPLICIT NONE
INTEGER, DIMENSION(d,d) :: ident
REAL(8), DIMENSION(d) :: xd
INTEGER, DIMENSION(2*d) :: temp
INTEGER, DIMENSION(:,:), ALLOCATABLE :: grid_index, temp_grid_index, grid_index_new, J_index
REAL(8), DIMENSION(:), ALLOCATABLE :: coeff, temp_coeff, J_coeff
REAL(8) :: temp_min, temp_max, V, T, B, F, x1
INTEGER :: k, k_1, k_2, h, i, j, L, n, dd, L1, L2, dsize, count, first, repeated, add, ind
INTEGER :: time1, time2, clock_rate, clock_max
REAL(8), DIMENSION(L_max,L_max,2**(L_max),2**(L_max)) :: coeff_grid
INTEGER, DIMENSION(d) :: level, LL, ii
REAL(8), DIMENSION(testing_sample,d) :: x_rand
REAL(8), DIMENSION(testing_sample) :: interp1, interp2
! ============================================================================
! EXECUTABLE
! ============================================================================
ident = 0
DO i = 1,d
ident(i,i) = 1
ENDDO
! Initial grid point
dsize = 1
ALLOCATE(grid_index(dsize,2*d),grid_index_new(dsize,2*d))
grid_index(1,:) = 1
grid_index_new = grid_index
ALLOCATE(coeff(dsize))
xd = (/ 0.5D0, 0.5D0 /)
CALL FF(xd,coeff(1))
CALL FF(xd,coeff_grid(1,1,1,1))
L = 1
n = SIZE(grid_index_new,1)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO WHILE (L .LT. L_max)
L = L+1
n = SIZE(grid_index_new,1)
count = 0
first = 1
DEALLOCATE(J_index,J_coeff)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
J_index = 0
J_coeff = 0.0D0
DO k = 1,n
DO i = 1,d
DO j = 1,2
IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ELSEIF (bound .EQ. 1) THEN
IF (grid_index_new(k,i) .EQ. 1) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(-(-1)**j)/)
ELSE
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ENDIF
ENDIF
CALL XX(d,temp(1:d),temp(d+1:2*d),xd)
temp_min = MINVAL(xd)
temp_max = MAXVAL(xd)
IF ((temp_min .GE. 0.0D0) .AND. (temp_max .LE. 1.0D0)) THEN
IF (first .EQ. 1) THEN
first = 0
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ELSE
repeated = 0
DO h = 1,count
IF (SUM(ABS(J_index(h,:)-temp)) .EQ. 0) THEN
repeated = 1
ENDIF
ENDDO
IF (repeated .EQ. 0) THEN
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ENDIF
ENDIF
ENDIF
ENDDO
ENDDO
ENDDO
ALLOCATE(temp_grid_index(dsize,2*d))
ALLOCATE(temp_coeff(dsize))
temp_grid_index = grid_index
temp_coeff = coeff
DEALLOCATE(grid_index,coeff)
ALLOCATE(grid_index(dsize+count,2*d))
ALLOCATE(coeff(dsize+count))
grid_index(1:dsize,:) = temp_grid_index
coeff(1:dsize) = temp_coeff
DEALLOCATE(temp_grid_index,temp_coeff)
grid_index(dsize+1:dsize+count,:) = J_index(1:count,:)
coeff(dsize+1:dsize+count) = J_coeff(1:count)
dsize = dsize + count
DO i = 1,count
coeff_grid(J_index(i,1),J_index(i,2),J_index(i,3),J_index(i,4)) = J_coeff(i)
ENDDO
IF (L .LE. L_0) THEN
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(count,2*d))
grid_index_new = J_index(1:count,:)
ELSE
add = 0
DO h = 1,count
IF (ABS(J_coeff(h)) .GT. eps) THEN
add = add + 1
J_index(add,:) = J_index(h,:)
ENDIF
ENDDO
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(add,2*d))
grid_index_new = J_index(1:add,:)
ENDIF
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time1 = ', DBLE(time2-time1)/DBLE(clock_rate)
PRINT *, 'Grid Points = ', SIZE(grid_index,1)
! ============================================================================
! Compute interpolated values:
! ============================================================================
CALL RANDOM_NUMBER(x_rand)
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO i = 1,testing_sample
V = 0.0D0
DO L1=1,L_max
DO L2=1,L_max
IF (L1+L2 .LE. L_max+1) THEN
level = (/L1,L2/)
T = 1.0D0
DO dd = 1,d
T = T*(1.0D0-ABS(x_rand(i,dd)/2.0D0**(-DBLE(level(dd)))-DBLE(2*FLOOR(x_rand(i,dd)*2.0D0**DBLE(level(dd)-1))+1)))
ENDDO
V = V + coeff_grid(L1,L2,2*FLOOR(x_rand(i,1)*2.0D0**DBLE(L1-1))+1,2*FLOOR(x_rand(i,2)*2.0D0**DBLE(L2-1))+1)*T
ENDIF
ENDDO
ENDDO
interp2(i) = V
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time2 = ', DBLE(time2-time1)/DBLE(clock_rate)
END PROGRAM
And that's how you use it:
I must confess I was reluctant to post this code because, even though this answers your question, I honestly think this is
not
what you really want/need, but you dind't provide enough information for me to know what you reallydo
want/need.Edit (After actual code from OP):
If I decrypted your code correctly (and considering what you said in your previous question), you are declaring:
(where
L_max = 9
, sosize(coeff_grid) = 21233664
=~160MB) and then populating it with:(where
count
is of the order of 1000, i.e. 0.005% of its elements), so this way you can fetch the values by its 4 indices with the array notation.Please, don't do that. You don't need a sparse matrix in this case either. The new approach you proposed is much better: storing the indices in each row of an smaller array, and fetching on the array of coefficients by the corresponding location of those indices in its own array. This is way faster (avoiding the large allocation) and much more memory-efficient.
PS: Is it mandatory for you to stick to Fortran 90? Its a very old version of the standard and chances are that the compiler you're using implements a more recent version. You could improve the quality of your code a lot with the intrinsic
move_alloc
(for less array copies), the kind constants from the intrinsic moduleiso_fortran_env
(for portability), the[], >, <, <=,...
notation (for readability)...