@InCollection{Supelec816,
author = {Stephane Vialle and },
title = {{Optimization methodology for Parallel Programming of Homogeneous or Hybrid Clusters}},
year = {2013},
booktitle = {{Patterns for parallel programming on GPUs}},
publisher = {Saxe-Coburg Publications},
month = {February},
editor = {F. Magoules},
abstract = {This chapter proposes a study of the optimization
process of parallel
applications to be run on modern architectures (multi-core CPU
nodes with GPUs).
Different optimization schemes are proposed for overlapping
computations with
communications, and for computation kernels.
Development methodologies are introduced to obtain
different optimization
degrees and specific criteria are defined to help developers
find the most
suited degree of optimization according to the considered
application and
parallel system. According to our experience in industrial
collaborations, we
analyze both performance and code complexity increase. This
last point is an
important issue, especially in the industry, as it directly
impacts development
and maintenance costs.
Complete experiments are performed to evaluate the different
variants of a
benchmark application that consists in a dense matrix
product. In those
experiments, different runtime parameters and cluster
configurations are tested.
Then, the results are analyzed to evaluate the interest of
the different
optimization degrees as well as to validate the interest
of the proposed
optimization methodology.}
}