BibTex

@InCollection{Supelec816,
author = {Stephane Vialle and },
title = {{Optimization methodology for Parallel Programming of Homogeneous or Hybrid Clusters}},
year = {2013},
booktitle = {{Patterns for parallel programming on GPUs}},
publisher = {Saxe-Coburg Publications},
month = {February},
editor = {F. Magoules},
abstract = {This chapter proposes a study of the optimization process of parallel applications to be run on modern architectures (multi-core CPU nodes with GPUs). Different optimization schemes are proposed for overlapping computations with communications, and for computation kernels. Development methodologies are introduced to obtain different optimization degrees and specific criteria are defined to help developers find the most suited degree of optimization according to the considered application and parallel system. According to our experience in industrial collaborations, we analyze both performance and code complexity increase. This last point is an important issue, especially in the industry, as it directly impacts development and maintenance costs. Complete experiments are performed to evaluate the different variants of a benchmark application that consists in a dense matrix product. In those experiments, different runtime parameters and cluster configurations are tested. Then, the results are analyzed to evaluate the interest of the different optimization degrees as well as to validate the interest of the proposed optimization methodology.}
}