HPMC User Guide v 1.00
© 2022 Bassem W. Jamaleddine
3-2
FORTRAN program can call CUDA routines by linking to CUDA object files. The following shows an example on how to compile a FORTRAN program that calls CUDA through an external kernel_wrapper method.
1. #include <stdio.h> 2. #include <stdlib.h> 3. #include <string.h> 4. #include <cuda.h> 5. #include <cuda_runtime.h> 6. 7. 8. // simple kernel function that adds two vectors 9. __global__ void vect_add(float *a, float *b, int N) 10. { 11. int idx = threadIdx.x; 12. if (idx<N) a[idx] = a[idx] + b[idx]; 13. } 14. 15. // function called from main fortran program 16. extern "C" void kernel_wrapper_(float *a, float *b, int *Np) 17. { 18. float *a_d, *b_d; // declare GPU vector copies 19. 20. int blocks = 1; // uses 1 block of 21. int N = *Np; // N threads on GPU 22. 23. // Allocate memory on GPU 24. cudaMalloc( (void **)&a_d, sizeof(float) * N ); 25. cudaMalloc( (void **)&b_d, sizeof(float) * N ); 26. 27. // copy vectors from CPU to GPU 28. cudaMemcpy( a_d, a, sizeof(float) * N, cudaMemcpyHostToDevice ); 29. cudaMemcpy( b_d, b, sizeof(float) * N, cudaMemcpyHostToDevice ); 30. 31. // call function on GPU 32. vect_add<<< blocks, N >>>( a_d, b_d, N); 33. 34. // copy vectors back from GPU to CPU 35. cudaMemcpy( a, a_d, sizeof(float) * N, cudaMemcpyDeviceToHost ); 36. cudaMemcpy( b, b_d, sizeof(float) * N, cudaMemcpyDeviceToHost ); 37. 38. // free GPU memory 39. cudaFree(a_d); 40. cudaFree(b_d); 41. return; 42. }
1. PROGRAM fortest 2. 3. ! simple program which creates 2 vectors and adds them in a 4. ! cuda function 5. 6. IMPLICIT NONE 7. 8. integer*4 :: i 9. integer*4, parameter :: N=8 10. real*4, Dimension(N) :: a, b 11. 12. DO i=1,N 13. a(i)=i*1.0 14. b(i)=2.0 15. END DO 16. 17. print *, 'a = ', (a(i), i=1,N) 18. 19. CALL kernel_wrapper(a, b, N) 20. 21. print *, 'a + 2 = ', (a(i), i=1,N) 22. 23. END PROGRAM
Makefile to compile the program fortest.f95
Test: fortest.f95 cudatest.o
gfortran -L /usr/local/cuda/lib -I /usr/local/cuda/include -lcudart -lcuda fortest.f95 cudatest.o
cudatest.o: cudatest.cu
nvcc -c -O3 cudatest.cu
clean:
rm a.out cudatest.o cudatest.linkinfo