CUDA Fortran

HPMC User Guide v 1.00
© 2022 Bassem W. Jamaleddine

3-2

CUDA Fortran

FORTRAN program can call CUDA routines by linking to CUDA object files. The following shows an example on how to compile a FORTRAN program that calls CUDA through an external kernel_wrapper method.

-- Program Code 3.2.1 : [LISTING fortest.f95] - [CUDA Program]

(raw text)

1.     #include <stdio.h>
2.     #include <stdlib.h>
3.     #include <string.h>
4.     #include <cuda.h>
5.     #include <cuda_runtime.h>
6.     
7.     
8.     // simple kernel function that adds two vectors
9.     __global__ void vect_add(float *a, float *b, int N)
10.    {
11.       int idx = threadIdx.x;
12.       if (idx<N) a[idx] = a[idx] + b[idx];
13.    }
14.    
15.    // function called from main fortran program
16.    extern "C" void kernel_wrapper_(float *a, float *b, int *Np)
17.    {
18.       float  *a_d, *b_d;  // declare GPU vector copies
19.       
20.       int blocks = 1;     // uses 1 block of
21.       int N = *Np;        // N threads on GPU
22.    
23.       // Allocate memory on GPU
24.       cudaMalloc( (void **)&a_d, sizeof(float) * N );
25.       cudaMalloc( (void **)&b_d, sizeof(float) * N );
26.    
27.       // copy vectors from CPU to GPU
28.       cudaMemcpy( a_d, a, sizeof(float) * N, cudaMemcpyHostToDevice );
29.       cudaMemcpy( b_d, b, sizeof(float) * N, cudaMemcpyHostToDevice );
30.    
31.       // call function on GPU
32.       vect_add<<< blocks, N >>>( a_d, b_d, N);
33.    
34.       // copy vectors back from GPU to CPU
35.       cudaMemcpy( a, a_d, sizeof(float) * N, cudaMemcpyDeviceToHost );
36.       cudaMemcpy( b, b_d, sizeof(float) * N, cudaMemcpyDeviceToHost );
37.    
38.       // free GPU memory
39.       cudaFree(a_d);
40.       cudaFree(b_d);
41.       return;
42.    }

HPMC 2022

-- Program Code 3.2.2 : [LISTING fortest.f95] - [Fortran 95 Program]

(raw text)

1.     PROGRAM fortest
2.     
3.     ! simple program which creates 2 vectors and adds them in a 
4.     ! cuda function
5.     
6.     IMPLICIT NONE
7.     
8.     integer*4 :: i
9.     integer*4, parameter :: N=8
10.    real*4, Dimension(N) :: a, b
11.    
12.    DO i=1,N
13.      a(i)=i*1.0
14.      b(i)=2.0
15.    END DO
16.    
17.     print *, 'a = ', (a(i), i=1,N)
18.    
19.      CALL kernel_wrapper(a, b, N)
20.    
21.     print *, 'a + 2 = ', (a(i), i=1,N)
22.    
23.    END PROGRAM

HPMC 2022

Makefile to compile the program fortest.f95

Test: fortest.f95 cudatest.o
        gfortran -L /usr/local/cuda/lib -I /usr/local/cuda/include -lcudart -lcuda fortest.f95 cudatest.o
cudatest.o: cudatest.cu 
        nvcc -c -O3 cudatest.cu
clean:
        rm a.out cudatest.o cudatest.linkinfo