-
O. Beaumont,
P. Ramet,
and J. Roman.
Asymptotically optimal algorithm for Laplace task graphs on heterogeneous platforms.
In Proceedings of Fifth International Conference on Parallel Processing and Applied Mathematics, Workshop HeteroPar,
volume 3019 of LNCS,
Czestochowa, Poland,
pages 880-887,
September 2003.
Springer Verlag.
Keyword(s): Overlap.
Abstract:
In this paper, we focus on the scheduling of Laplace task graph on a general platform where both communication links and processing units are heterogeneous. In this context, it is known that deriving optimal algorithm, in the sense of makespan minimization, is NP-Complete, and several inapproximation results have been proved. Nevertheless, we provide an asymtotically optimal algorithm in this general context. Moreover, we expect that this methodolgy can be extended to more general task graphs, especially for nested loops where the inner-most loop is parallel. |
@InProceedings{C:LaBRI::ppam03,
author = "Beaumont, O. and Ramet, P. and Roman, J.",
title = "Asymptotically optimal algorithm for Laplace task graphs on heterogeneous platforms",
booktitle = "Proceedings of Fifth International Conference on Parallel Processing and Applied Mathematics, Workshop HeteroPar",
OPTcrossref = {},
OPTkey = {},
OPTeditor = {},
volume = {3019},
OPTnumber = {},
series = {LNCS},
year = "2003",
OPTorganization = {},
publisher = {Springer Verlag},
address = {Czestochowa, Poland},
month = sep,
pages = {880--887},
OPTnote = {},
OPTannote = {},
URL = {http://www.labri.fr/~ramet/restricted/ppam03.ps},
KEYWORDS = "Overlap",
ABSTRACT = { In this paper, we focus on the scheduling of Laplace task graph on a general platform where both communication links and processing units are heterogeneous. In this context, it is known that deriving optimal algorithm, in the sense of makespan minimization, is NP-Complete, and several inapproximation results have been proved. Nevertheless, we provide an asymtotically optimal algorithm in this general context. Moreover, we expect that this methodolgy can be extended to more general task graphs, especially for nested loops where the inner-most loop is parallel. }
}
-
D. Goudin,
P. Hénon,
M. Mandallena,
K. Mer,
F. Pellegrini,
P. Ramet,
J. Roman,
and J.-J. Pesque.
Outils numériques parallèles pour la résolution de très grands problèmes d'électromagnétisme.
In Séminaire sur l'Algorithmique Numérique Appliquée aux Problèmes Industriels,
Calais, France,
May 2003.
Keyword(s): Sparse.
Abstract:
L'\'emergence des machines parall\`eles t\'eraflopiques permet maintenant de traiter des probl\`emes de diffraction de tr\`es grande taille li\'es \`a des calculs hautes fr\'equences par des m\'ethodes exactes. Une premi\`ere \'etape de ce travail a constist\'e \`a parall\'eliser des codes s\'equentiels existants qui utilisaient des m\'ethodes num\'eriques classiques. Ces codes \'etaient bas\'es sur un couplage fort par compl\'ement de Schur entre une formulation int\'egrale (\'el\'ements-finis surfaciques pour traiter la condition de radiation \`a l'infini) et une formulation \'el\'ements-finis volumiques (pour calculer les champs \`a l'int\'erieur de l'objet). L'adapation au parall\'elisme a consist\'e \`a parall\'eliser la factorisation du syst\`eme lin\'eaire creux (\`a l'aide de la biblioth\'eque EMILIO/PaStiX, la r\'esolution du syst\`eme int\'egral plein (sym\'etrique, complexe) et le couplage par compl\'ement de Schur (consistant en des descentes-remont\'ees sur le syst\`eme creux associ\'e). Les deux premi\`eres \'etapes cit\'ees ont donn\'e pleinement satisfaction, mais le compl\'ement de Schur, \`a cause du tr\'es grand nombre descentes-remont\'ees li\'ees aux inconnues surfaciques, limite l'efficacit\'e globale de cette approche \`a des probl\`emes de taille interm\'ediaire. Par exemple, pour le cas test du ``Sphere-Cone'' comportant $3.10^6$ d'inconnues volumiques et $23.10^3$ inconnues surfaciques, sur 32 n\oe uds ES45 (quadri-processeurs), l'assemblage volumique repr\'esente 6s, la factorisation du syst\`eme creux est obtenue en 376s et le compl\'ement de Schur en 21500s (en complexe double pr\'ecision). Pour d\'epasser ces limites, une \'evolution globale de l'algorithme a \'et\'e n\'ecessaire~: - un couplage it\'eratif entre les r\'esolutions surfacique et volumique a \'et\'e mis en place; - pour augmenter le nombre d'inconnues surfaciques une m\'ethode multip\^ole multi-niveaux parall\'ele a \'et\'e d\'evelopp\'ee; - enfin, pour atteindre des tailles de probl\`eme de plusieurs dizaines de millions d'inconnues volumiques, des \'evolutions ont \'et\'e apport\'ees dans la biblioth\`eque EMILIO/PaStiX. En particulier, une algorithmique sp\'ecifique a \'et\'e mise en place pour utiliser au mieux les clusters de n\oe uds SMP et pour pr\'eserver une scalabilit\'e m\'emoire raisonnable. Enfin, le sch\'ema actuel pr\'esente un couplage direct-it\'eratif \`a plusieurs niveaux. L'\'evolution envisag\'ee sera un couplage it\'eratif-it\'eratif, puis sera bas\'ee sur des m\'ethodes hybrides globales (interm\'ediaire entre le direct et l'it\'eratif)~: une version de PaStiX \'evolu\'ee permettra de calculer un pr\'econdition\-neur par bloc ILU(k) parall\`ele. |
@InProceedings{c:LaBRI::GHMMPRRP03,
author = "Goudin, D. and H\'enon, P. and Mandallena, M. and Mer, K. and Pellegrini, F. and Ramet, P. and Roman, J. and Pesque, J.-J.",
title = "Outils num\'eriques parall\`eles pour la r\'esolution de tr\`es grands probl\`emes d'\'electromagn\'etisme",
booktitle = "{S}\'eminaire sur l'{A}lgorithmique {N}um\'erique {A}ppliqu\'ee aux {P}robl\`emes {I}ndustriels",
OPTcrossref = {},
OPTkey = {},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
year = "2003",
OPTorganization = {},
OPTpublisher = {},
address = {Calais, France},
month = may,
OPTpages = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse",
ABSTRACT = { L'\'emergence des machines parall\`eles t\'eraflopiques permet maintenant de traiter des probl\`emes de diffraction de tr\`es grande taille li\'es \`a des calculs hautes fr\'equences par des m\'ethodes exactes. Une premi\`ere \'etape de ce travail a constist\'e \`a parall\'eliser des codes s\'equentiels existants qui utilisaient des m\'ethodes num\'eriques classiques. Ces codes \'etaient bas\'es sur un couplage fort par compl\'ement de Schur entre une formulation int\'egrale (\'el\'ements-finis surfaciques pour traiter la condition de radiation \`a l'infini) et une formulation \'el\'ements-finis volumiques (pour calculer les champs \`a l'int\'erieur de l'objet). L'adapation au parall\'elisme a consist\'e \`a parall\'eliser la factorisation du syst\`eme lin\'eaire creux (\`a l'aide de la biblioth\'eque EMILIO/PaStiX, la r\'esolution du syst\`eme int\'egral plein (sym\'etrique, complexe) et le couplage par compl\'ement de Schur (consistant en des descentes-remont\'ees sur le syst\`eme creux associ\'e). Les deux premi\`eres \'etapes cit\'ees ont donn\'e pleinement satisfaction, mais le compl\'ement de Schur, \`a cause du tr\'es grand nombre descentes-remont\'ees li\'ees aux inconnues surfaciques, limite l'efficacit\'e globale de cette approche \`a des probl\`emes de taille interm\'ediaire. Par exemple, pour le cas test du ``Sphere-Cone'' comportant $3.10^6$ d'inconnues volumiques et $23.10^3$ inconnues surfaciques, sur 32 n\oe uds ES45 (quadri-processeurs), l'assemblage volumique repr\'esente 6s, la factorisation du syst\`eme creux est obtenue en 376s et le compl\'ement de Schur en 21500s (en complexe double pr\'ecision). Pour d\'epasser ces limites, une \'evolution globale de l'algorithme a \'et\'e n\'ecessaire~: - un couplage it\'eratif entre les r\'esolutions surfacique et volumique a \'et\'e mis en place; - pour augmenter le nombre d'inconnues surfaciques une m\'ethode multip\^ole multi-niveaux parall\'ele a \'et\'e d\'evelopp\'ee; - enfin, pour atteindre des tailles de probl\`eme de plusieurs dizaines de millions d'inconnues volumiques, des \'evolutions ont \'et\'e apport\'ees dans la biblioth\`eque EMILIO/PaStiX. En particulier, une algorithmique sp\'ecifique a \'et\'e mise en place pour utiliser au mieux les clusters de n\oe uds SMP et pour pr\'eserver une scalabilit\'e m\'emoire raisonnable. Enfin, le sch\'ema actuel pr\'esente un couplage direct-it\'eratif \`a plusieurs niveaux. L'\'evolution envisag\'ee sera un couplage it\'eratif-it\'eratif, puis sera bas\'ee sur des m\'ethodes hybrides globales (interm\'ediaire entre le direct et l'it\'eratif)~: une version de PaStiX \'evolu\'ee permettra de calculer un pr\'econdition\-neur par bloc ILU(k) parall\`ele. }
}
-
P. Hénon,
F. Pellegrini,
P. Ramet,
and J. Roman.
An efficient hybrid MPI/Thread implementation on a network of SMP nodes for the parallel sparse direct solver PaStix: ordering / scheduling / memory managment / out-of-core issues, and application to preconditioning.
In Sparse Days,
Saint Girons, France,
June 2003.
Keyword(s): Sparse.
@InProceedings{C:LaBRI::sparsedays2003,
author = {H\'enon, P. and Pellegrini, F. and Ramet, P. and Roman, J.},
title = {An efficient hybrid MPI/Thread implementation on a network of SMP nodes for the parallel sparse direct solver PaStix: ordering / scheduling / memory managment / out-of-core issues, and application to preconditioning},
booktitle = {Sparse Days},
OPTcrossref = {},
OPTkey = {},
OPTpages = {},
year = {2003},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Saint Girons, France},
month = jun,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse"
}
-
P. Hénon,
F. Pellegrini,
P. Ramet,
and J. Roman.
Towards High Performance Hybrid Direct-Iterative Solvers for Large Sparse Systems.
In International SIAM Conference On Preconditioning Techniques For Large Sparse Matrix Problems In Scientific And Industrial Applications,
Napa Valley, USA,
October 2003.
Keyword(s): Sparse.
@InProceedings{C:LaBRI::pre03,
author = {H\'enon, P. and Pellegrini, F. and Ramet, P. and Roman, J.},
title = {Towards High Performance Hybrid Direct-Iterative Solvers for Large Sparse Systems},
booktitle = {International SIAM Conference On Preconditioning Techniques For Large Sparse Matrix Problems In Scientific And Industrial Applications},
OPTcrossref = {},
OPTkey = {},
OPTpages = {},
year = {2003},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Napa Valley, USA},
month = oct,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
URL = {http://www.labri.fr/~ramet/restricted/pred2003.ps},
KEYWORDS = "Sparse"
}
-
P. Hénon,
P. Ramet,
and J. Roman.
Efficient algorithms for direct resolution of large sparse system on clusters of SMP nodes.
In SIAM Conference on Applied Linear Algebra,
Williamsburg, USA,
July 2003.
Keyword(s): Sparse.
Abstract:
In previous works, we have described an efficient static scheduling based on a mixed 1D/2D block distribution with local aggregation for a parallel supernodal version of sparse $LL^{T}$ factorization. In this paper, we present new algorithms suitable for overcoming architectures based on clusters of SMP nodes and also technics to keep a good memory scalability. This algorithms are implemented in the PaStiX library that achieved high performances (resolution of a $26.10^6$ unknown system on 192 ES45 quadriprocessors with 35 percent of peak performance). |
@InProceedings{C:LaBRI::siam2003,
author = {H\'enon, P. and Ramet, P. and Roman, J.},
title = {Efficient algorithms for direct resolution of large sparse system on clusters of {SMP} nodes},
booktitle = {{SIAM} Conference on Applied Linear Algebra},
OPTcrossref = {},
OPTkey = {},
OPTpages = {},
year = {2003},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Williamsburg, USA},
month = jul,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse",
URL = {http://www.labri.fr/~ramet/restricted/RAMETP.PDF},
ABSTRACT = {In previous works, we have described an efficient static scheduling based on a mixed 1D/2D block distribution with local aggregation for a parallel supernodal version of sparse $LL^{T}$ factorization. In this paper, we present new algorithms suitable for overcoming architectures based on clusters of SMP nodes and also technics to keep a good memory scalability. This algorithms are implemented in the PaStiX library that achieved high performances (resolution of a $26.10^6$ unknown system on 192 ES45 quadriprocessors with 35 percent of peak performance). }
}
-
P. Hénon and Y. Saad.
A Parallel ILU factorization based on a Hierarchical Interface Decomposition algorithm.
In International SIAM Conference On Preconditioning Techniques For Large Sparse Matrix Problems In Scientific And Industrial Applications,
Napa Valley, USA,
October 2003.
Keyword(s): Sparse.
@InProceedings{C:LaBRI::pre03ph,
author = {H\'enon, P. and Saad, Y.},
title = {A Parallel ILU factorization based on a Hierarchical Interface Decomposition algorithm},
booktitle = {International SIAM Conference On Preconditioning Techniques For Large Sparse Matrix Problems In Scientific And Industrial Applications},
OPTcrossref = {},
OPTkey = {},
OPTpages = {},
year = {2003},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Napa Valley, USA},
month = oct,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
OPTURL = {},
KEYWORDS = "Sparse"
}
-
P. Hénon and Y. Saad.
PHIDAL: A Parallel Hierarchical Interface Decomposition Algorithm for solving sparse linear systems.
In Sparse Days and Grid Computing,
Saint Girons, France,
June 2003.
Keyword(s): Sparse.
@InProceedings{C:LaBRI::sparsedays2003ph,
author = {H\'enon, P. and Saad, Y.},
title = {PHIDAL: A Parallel Hierarchical Interface Decomposition Algorithm for solving sparse linear systems},
booktitle = {Sparse Days and Grid Computing},
OPTcrossref = {},
OPTkey = {},
OPTpages = {},
year = {2003},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Saint Girons, France},
month = jun,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse"
}