1. #ifndef MIC_DEV 2. #define MIC_DEV 0 3. #endif 4. 5. #include <stdio.h> 6. #include <stdlib.h> 7. #include <omp.h> 8. #include <math.h> 9. #include <sys/time.h> 10. 11. double getTime(void) { 12. struct timeval t; 13. gettimeofday(&t,0); 14. return ((double)t.tv_sec + ((double)t.tv_usec / 1000000.0)); 15. } 16. 17. // ################################ 18. // openMP multiply matrices 19. void doMult(int size, float (* restrict A)[size], 20. float (* restrict B)[size], float (* restrict C)[size] 21. ) { 22. 23. #pragma offload target(mic:MIC_DEV) \ 24. in(A:length(size*size)) in( B:length(size*size)) \ 25. out(C:length(size*size)) 26. { 27. #pragma omp parallel for default(none) shared(C,size) 28. for (int i = 0; i < size; ++i) 29. for (int j = 0; j < size; ++j) 30. C[i][j] =0.f; 31. // matrix multiplication. 32. #pragma omp parallel for default(none) shared(A,B,C,size) 33. for (int i = 0; i < size; ++i) 34. for (int k = 0; k < size; ++k) 35. for (int j = 0; j < size; ++j) 36. C[i][j] += A[i][k] * B[k][j]; 37. } 38. } 39. 40. // ################################ 41. int main(int argc, char *argv[]) { 42. if(argc != 4) { 43. fprintf(stderr,"Use: %s size nThreads iter\n",argv[0]); 44. return -1; 45. } 46. int i,j,k; 47. int size=atoi(argv[1]); 48. int nThreads=atoi(argv[2]); 49. int iter=atoi(argv[3]); 50. omp_set_num_threads(nThreads); 51. float (*restrict A)[size] = malloc(sizeof(float)*size*size); 52. float (*restrict B)[size] = malloc(sizeof(float)*size*size); 53. float (*restrict C)[size] = malloc(sizeof(float)*size*size); 54. // init natrices A and B 55. #pragma omp parallel for default(none) shared(A,B,size) private(i,j,k) 56. for (i = 0; i < size; ++i) { 57. for (j = 0; j < size; ++j) { 58. A[i][j] = (float)((rand() % 3)-1); 59. B[i][j] = (float)((rand() % 3)-1); 60. } 61. } 62. // warmup 63. doMult(size, A,B,C); 64. double startTime = getTime(); 65. for (int i=0; i < iter; i++) { 66. doMult(size, A,B,C); 67. } 68. double endTime = getTime(); 69. double lapsed = endTime-startTime; 70. lapsed /= iter; 71. 72. printf("%s nThrds %d size %d elapsedtime %g GFlop/s %g\n", 73. argv[0], nThreads, size, lapsed, 2e-9*size*size*size/lapsed); 74. 75. free(A); free(B); free(C); 76. return 0; 77. } 78.