Difference between revisions of "Programming/OpenMP"
From HPC
m |
m |
||
Line 16: | Line 16: | ||
* Serializing sections of code | * Serializing sections of code | ||
* Synchronization of work among threads | * Synchronization of work among threads | ||
+ | |||
+ | === Programming example === | ||
+ | |||
+ | <pre style="background-color: #C8C8C8; color: black; border: 2px solid black; font-family: monospace, sans-serif;"> | ||
+ | |||
+ | #include <stdio.h> | ||
+ | #include <stdlib.h> | ||
+ | #include <malloc.h> | ||
+ | |||
+ | /* compile with gcc -o test2 -fopenmp test2.c */ | ||
+ | |||
+ | int main(int argc, char** argv) | ||
+ | { | ||
+ | int i = 0; | ||
+ | int size = 20; | ||
+ | int* a = (int*) calloc(size, sizeof(int)); | ||
+ | int* b = (int*) calloc(size, sizeof(int)); | ||
+ | int* c; | ||
+ | |||
+ | for ( i = 0; i < size; i++ ) | ||
+ | { | ||
+ | a[i] = i; | ||
+ | b[i] = size-i; | ||
+ | printf("[BEFORE] At %d: a=%d, b=%d\n", i, a[i], b[i]); | ||
+ | } | ||
+ | |||
+ | #pragma omp parallel shared(a,b) private(c,i) | ||
+ | { | ||
+ | c = (int*) calloc(3, sizeof(int)); | ||
+ | |||
+ | #pragma omp for | ||
+ | for ( i = 0; i < size; i++ ) | ||
+ | { | ||
+ | c[0] = 5*a[i]; | ||
+ | c[1] = 2*b[i]; | ||
+ | c[2] = -2*i; | ||
+ | a[i] = c[0]+c[1]+c[2]; | ||
+ | |||
+ | c[0] = 4*a[i]; | ||
+ | c[1] = -1*b[i]; | ||
+ | c[2] = i; | ||
+ | b[i] = c[0]+c[1]+c[2]; | ||
+ | } | ||
+ | |||
+ | free(c); | ||
+ | } | ||
+ | |||
+ | for ( i = 0; i < size; i++ ) | ||
+ | { | ||
+ | printf("[AFTER] At %d: a=%d, b=%d\n", i, a[i], b[i]); | ||
+ | } | ||
+ | } | ||
+ | </pre> | ||
+ | |||
+ | ==== Compiled ==== | ||
+ | |||
+ | The program would be compiled in the following way, optional Intel compiler available too: | ||
+ | |||
+ | <pre style="background-color: #C8C8C8; color: black; border: 2px solid black; font-family: monospace, sans-serif;"> | ||
+ | |||
+ | module load gcc/4.9.3 | ||
+ | gcc -o test2 -fopenmp test2.c | ||
+ | |||
+ | </pre> | ||
+ | |||
== Usage Examples == | == Usage Examples == |
Revision as of 15:03, 30 January 2017
Contents
Programming Details
OpenMP is designed for multi-processor/core, shared memory machines. The underlying architecture can be shared memory UMA or NUMA.
It is an Application Program Interface (API) that may be used to explicitly direct multi-threaded, shared memory parallelism. Comprised of three primary API components:
- Compiler Directives
- Runtime Library Routines
- Environment Variables
OpenMP compiler directives are used for various purposes:
- Spawning a parallel region
- Dividing blocks of code among threads
- Distributing loop iterations between threads
- Serializing sections of code
- Synchronization of work among threads
Programming example
#include <stdio.h> #include <stdlib.h> #include <malloc.h> /* compile with gcc -o test2 -fopenmp test2.c */ int main(int argc, char** argv) { int i = 0; int size = 20; int* a = (int*) calloc(size, sizeof(int)); int* b = (int*) calloc(size, sizeof(int)); int* c; for ( i = 0; i < size; i++ ) { a[i] = i; b[i] = size-i; printf("[BEFORE] At %d: a=%d, b=%d\n", i, a[i], b[i]); } #pragma omp parallel shared(a,b) private(c,i) { c = (int*) calloc(3, sizeof(int)); #pragma omp for for ( i = 0; i < size; i++ ) { c[0] = 5*a[i]; c[1] = 2*b[i]; c[2] = -2*i; a[i] = c[0]+c[1]+c[2]; c[0] = 4*a[i]; c[1] = -1*b[i]; c[2] = i; b[i] = c[0]+c[1]+c[2]; } free(c); } for ( i = 0; i < size; i++ ) { printf("[AFTER] At %d: a=%d, b=%d\n", i, a[i], b[i]); } }
Compiled
The program would be compiled in the following way, optional Intel compiler available too:
module load gcc/4.9.3 gcc -o test2 -fopenmp test2.c
Usage Examples
Batch example
#!/bin/bash #SBATCH -J openmpi-single-node #SBATCH -N 1 #SBATCH --ntasks-per-node 28 #SBATCH -D /home/user/CODE_SAMPLES/OPENMP #SBATCH -o %N.%j.%a.out #SBATCH -e %N.%j.%a.err #SBATCH -p compute #SBATCH --exclusive echo $SLURM_JOB_NODELIST module purge module load gcc/4.9.3 export I_MPI_DEBUG=5 export I_MPI_FABRICS=shm:tmi export I_MPI_FALLBACK=no /home/user/CODE_SAMPLES/OPENMP/demo
[username@login01 ~]$ sbatch demo.job Submitted batch job 289552