# openmp
## openmp
* API for shared-memory parallel programming * standard * www.openmp.org * portable, PRAM like (CREW) * easy, but hides communication costs * easy API * C, C++, Fortran * compiler directives * runtime library routines * environment variables * nested parallelism, dynamic threads, no IO
## hello openmp
c #include <stdio.h> int main(void) { #pragma omp parallel printf("Hello, world.\n"); return 0; }
## OpenMP example c #include <stdio.h> #define N 1000 extern void combine(double, double); extern double big_comp(int); int main() { int i; double answer, res; answer = 0.0; for (i=0; i<N; ++i) { res = big_comp(i); combine (answer, res); } printf("%f\n", answer); }
## OpenMP example c #include <stdio.h> #include <omp.h> #define N 1000 extern void combine(double, double); extern double big_comp(int); int main() { int i; double answer, res; answer = 0.0; for (i=0; i<N; ++i) { res = big_comp(i); combine (answer, res); } printf("%f\n", answer); }
## OpenMP example c #include <stdio.h> #include <omp.h> #define N 1000 extern void combine(double, double); extern double big_comp(int); int main() { int i; double answer, res[N]; answer = 0.0; #pragma omp parallel for for (i=0; i<N; ++i) { res[i] = big_comp(i); } for (i=0; i<N; ++i) { combine (answer, res[i]); } printf("%f\n", answer); }
## openmp directives
### #pragma omp directive [clause list] * create teams for threads * specify work sharing * declare shared/private variables * synchronization * exclusive execution (critical regions)
## simple parallel tasks ### #pragma omp parallel
c #include <stdio.h> #include <omp.h> int main() { int i=5; // shared variable #pragma omp parallel { int c; // local/private variable to each thread c = omp_get_thread_num(); printf("c = %d, i=%d\n"") } return 0; } bash $ g++ -fopenmp ex1.cpp -o ex1 $ icpc -openmp ex1.cpp -o ex1 $ OMP_NUM_THREADS=2 ./ex1
## setting number of threads

#### #pragma omp parallel num_threads(3)
#### environment variable - OMP_NUM_THREADS bash: export OMP_NUM_THREADS=8 csh: setenv OMP_NUM_THREADS=8

## parallel sections
c #pragma omp parallel sections num_threads(3) { #pragma omp section { task_1(); } #pragma omp section { task_2(); } #pragma omp section { task_3(); } }
## matvec
c #pragma omp parallel for default(none) \ private (i,j,sum) shared(m,n,a,b,c) for (i=0; i<m; ++i) { sum = 0.0; for (j=0; j<n; ++j) sum += b[i][j]*c[j]; a[i] = sum; }
## matvec
## single execution and nowait c #pragma omp parallel { #pragma omp single // Only a single thread can read the input. printf_s("read input\n"); // Multiple threads in the team compute the results. #pragma omp for nowait for (i = 0; i < size; i++) b[i] = a[i] * a[i]; #pragma omp for for (i = 0; i < size; i++) c[i] = a[i]/2; #pragma omp single // Only a single thread can write the output. printf_s("write output\n"); }
## reductions
#### #pragma omp parallel for reduction(+:sum) c n = 1000; result = 0.0; #pragma omp parallel for reduction(+:result) for (i=0; i<n; ++i) result += a[i]*b[i];
## critical c #pragma omp critical(dataupdate) { datastructure.reorganize(); } ... #pragma omp critical(dataupdate) { datastructure.reorganize_again(); }
## locks c omp_lock_t lck1, lck2; int id; omp_init_lock(&lck1); omp_init_lock(&lck2); #pragma omp parallel shared(lck1, lck2) private(id) { id = omp_get_thread_num(); omp_set_lock(&lck1); printf("thread %d has the lock \n", id); printf("thread %d ready to release the lock \n", id); omp_unset_lock(&lck1); while (! omp_test_lock(&lck2)) { // do something useful while waiting for the lock do_something_else(id); } go_for_it(id); // Thread has the lock omp_unset_lock(&lck2); } omp_destroy_lock(&lck1); omp_destroy_lock(&lck2);
# errors
## errors c #pragma omp parallel { work1(); work2(); } #pragma omp parallel work3(); work4();
## errors c void compute(int n) { int i; double h, x, sum; h = 1.0/n; sum = 0.0; #pragma omp for reduction (+:sum) shared(h) for (i=0; i<n; ++i) { x = h*(i-0.5); sum += 1/(1+x*x); } pi = h*sum; }
## errors c int i,j; #pragma omp parallel for for (i=0; i<n; ++i) for (j=0; j<m; ++j) { a[i][j] = compute(i, j); }
## errors c int a, b, i; #pragma omp parallel for private(i,a,b) for (i=0; i<n; ++i) { b++; a = b*i; } c = a+b;
## correct version c int a, b=0, i; #pragma omp parallel for private(i), firstprivate(b), \ lastprivate(a,b) for (i=0; i<n; ++i) { b++; a = b*i; } c = a+b;