-
R. Abgrall,
O. Coulaud,
P. Hénon,
Huart R.,
Huysmans G.,
Latu G.,
B. Nkonga,
Pamela S.,
and P. Ramet.
Numerical simulation of tokamak plasmas.
In 7th PAMIR International Conference on Fundamental and Applied MHD,
Presqu'ile de Giens, France,
September 2008.
Keyword(s): Fusion.
@InProceedings{C:LaBRI::fusion2008,
author = {Abgrall, R. and Coulaud, O. and H\'enon, P. and Huart R. and Huysmans G. and Latu G. and Nkonga, B. and Pamela S. and Ramet, P.},
title = {Numerical simulation of tokamak plasmas},
OPTcrossref = {},
OPTkey = {},
booktitle = {7th PAMIR International Conference on Fundamental and Applied MHD},
OPTpages = {},
year = {2008},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Presqu'ile de Giens, France},
month = sep,
OPTorganization = {},
OPTpublisher = {},
URL = {http://www.labri.fr/~ramet/restricted/pamir.pdf},
KEYWORDS = "Fusion",
OPTannote = {}
}
-
M. Barrault,
B. Lathuilière,
P. Ramet,
and J. Roman.
A Domain Decomposition Method Applied to Large Eigenvalue Problems in Neutron Physics.
In Proceedings of PMAA'2008,
Neuchatel, Swiss,
June 2008.
Keyword(s): Neutron.
Abstract:
The simulation of the neutron transport inside a nuclear reactor leads to the computation of the lowest eigen pair of a simplified transport operator. This computation is done by a power inverse algorithm accelerated by a Chebyshev polynomials based process. At each iteration, a large linear system is solved inexactly by a block Gauss-Seidel algorithm. For our applications, one Gauss-Seidel iteration is already sufficient to ensure the right convergence of the inverse power algorithm. For the approximate resolution of the linear system at each inverse power iteration, we propose a non overlapping domain decomposition based on the introduction of Lagrange multipliers in order to: - get a parallel algorithm, which allows to circumvent memory consumption problem and to reduce the computational time; - deal with different numerical approximations in each subdomain; - minimize the code modifications in our industrial solver. When the Chebyshev acceleration process is switched off, the method performs well on up to 100 processors for an industrial test case. It exhibits a good efficiency which allows us to realize some computations beyond the reach of standard workstations. Besides, we study the efficiency of the Chebyshev acceleration process in our domain decomposition method. |
@InProceedings{C:LaBRI::PMAA2008a,
author = "Barrault, M. and Lathuili\`ere, B. and Ramet, P. and Roman, J.",
title = "A Domain Decomposition Method Applied to Large Eigenvalue Problems in Neutron Physics",
booktitle = "Proceedings of {PMAA}'2008",
OPTcrossref = {},
OPTkey = {},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
year = "2008",
OPTorganization = {},
OPTpublisher = {},
address = {Neuchatel, Swiss},
month = jun,
OPTpages = {},
OPTnote = {},
OPTannote = {},
OPTURL = {},
KEYWORDS = "Neutron",
ABSTRACT = {The simulation of the neutron transport inside a nuclear reactor leads to the computation of the lowest eigen pair of a simplified transport operator. This computation is done by a power inverse algorithm accelerated by a Chebyshev polynomials based process. At each iteration, a large linear system is solved inexactly by a block Gauss-Seidel algorithm. For our applications, one Gauss-Seidel iteration is already sufficient to ensure the right convergence of the inverse power algorithm. For the approximate resolution of the linear system at each inverse power iteration, we propose a non overlapping domain decomposition based on the introduction of Lagrange multipliers in order to: - get a parallel algorithm, which allows to circumvent memory consumption problem and to reduce the computational time; - deal with different numerical approximations in each subdomain; - minimize the code modifications in our industrial solver. When the Chebyshev acceleration process is switched off, the method performs well on up to 100 processors for an industrial test case. It exhibits a good efficiency which allows us to realize some computations beyond the reach of standard workstations. Besides, we study the efficiency of the Chebyshev acceleration process in our domain decomposition method.}
}
-
M. Barrault,
B. Lathuilière,
P. Ramet,
and J. Roman.
A domain decomposition method applied to the simplified transport equations.
In IEEE 11th International Conference on Computational Science and Engineering,
Sao Paulo, Brazil,
pages 91-97,
July 2008.
Keyword(s): Neutron.
Abstract:
The simulation of the neutron transport inside a nuclear reactor leads to the computation of the lowest eigen pair of a simplified transport operator. Whereas the sequential solution at our disposal today is really efficient, we are not able to run some industrial cases due to the memory consumption and the computational time. This problem brings us to study parallel strategies. In order to re-use an important part of the solver and to bypass some limitations of conforming cartesian meshes, we propose a non overlapping domain decomposition based on the introduction of Lagrange multipliers. The method performs well on up to $100$ processors for an industrial test case. |
@InProceedings{C:LaBRI::Neutron2008,
author = {Barrault, M. and Lathuili\`ere, B. and Ramet, P. and Roman, J.},
title = {A domain decomposition method applied to the simplified transport equations},
OPTcrossref = {},
OPTkey = {},
booktitle = {IEEE 11th International Conference on Computational Science and Engineering},
pages = {91--97},
year = {2008},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Sao Paulo, Brazil},
month = jul,
OPTorganization = {},
OPTpublisher = {},
URL = {http://www.labri.fr/~ramet/restricted/cse08.pdf},
ABSTRACT = {The simulation of the neutron transport inside a nuclear reactor leads to the computation of the lowest eigen pair of a simplified transport operator. Whereas the sequential solution at our disposal today is really efficient, we are not able to run some industrial cases due to the memory consumption and the computational time. This problem brings us to study parallel strategies. In order to re-use an important part of the solver and to bypass some limitations of conforming cartesian meshes, we propose a non overlapping domain decomposition based on the introduction of Lagrange multipliers. The method performs well on up to $100$ processors for an industrial test case.},
KEYWORDS = "Neutron",
OPTannote = {}
}
-
M. Barrault,
B. Lathuilière,
P. Ramet,
and J. Roman.
A domain decomposition method for the resolution of an eigenvalue problem in neutron physics.
In International Symposium on Iterative Methods in Scientific Computing (IMACS),
Lille, France,
March 2008.
Keyword(s): Neutron.
@InProceedings{C:LaBRI::neutron2008,
author = {Barrault, M. and Lathuili\`ere, B. and Ramet, P. and Roman, J.},
title = {A domain decomposition method for the resolution of an eigenvalue problem in neutron physics},
OPTcrossref = {},
OPTkey = {},
booktitle = {International Symposium on Iterative Methods in Scientific Computing (IMACS)},
OPTpages = {},
year = {2008},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Lille, France},
month = mar,
OPTorganization = {},
OPTpublisher = {},
URL = {http://www.labri.fr/~ramet/restricted/imacs.pdf},
KEYWORDS = "Neutron",
OPTannote = {}
}
-
Y. Caniou,
J.-S. Gay,
and P. Ramet.
Tunable parallel experiments in a GridRPC framework: application to linear solvers.
In VECPAR'08, 8th International Meeting High Performance Computing for Computational Science,
volume 5336 of LNCS,
Toulouse, France,
pages 430-436,
June 2008.
Springer Verlag.
Abstract:
The use of scientific computing centers becomes more and more difficult on modern parallel architectures. Users must face a large variety of batch systems (with their own specific syntax) and have to set many parameters to tune their applications (e.g., processors and/or threads mapping, memory resource constraints). Moreover, finding the optimal performance is not the only criteria when a pool of jobs is submitted on the Grid (for numerical parametric analysis for instance) and one must focus on the wall-time completion. In this work we tackle the problem by using the D IET Grid middleware that integrates an adaptable PaStiX service to solve a set of experiments issued from the simulations of the ASTER project. |
@InProceedings{C:LaBRI::vecpar08-diet,
author = {Caniou, Y. and Gay, J.-S. and Ramet, P.},
title = {Tunable parallel experiments in a GridRPC framework: application to linear solvers},
OPTcrossref = {},
OPTkey = {},
booktitle = {VECPAR'08, 8th International Meeting High Performance Computing for Computational Science},
pages = {430--436},
year = {2008},
OPTeditor = {},
volume = {5336},
OPTnumber = {},
series = {LNCS},
address = {Toulouse, France},
month = jun,
OPTorganization = {},
publisher = {Springer Verlag},
OPTannote = {},
URL = {http://www.labri.fr/~ramet/restricted/vecpar08-diet.pdf},
ABSTRACT = {The use of scientific computing centers becomes more and more difficult on modern parallel architectures. Users must face a large variety of batch systems (with their own specific syntax) and have to set many parameters to tune their applications (e.g., processors and/or threads mapping, memory resource constraints). Moreover, finding the optimal performance is not the only criteria when a pool of jobs is submitted on the Grid (for numerical parametric analysis for instance) and one must focus on the wall-time completion. In this work we tackle the problem by using the D IET Grid middleware that integrates an adaptable PaStiX service to solve a set of experiments issued from the simulations of the ASTER project.}
}
-
M. Faverge,
X. Lacoste,
and P. Ramet.
A NUMA Aware Scheduler for a Parallel Sparse Direct Solver.
In Proceedings of PMAA'2008,
Neuchatel, Swiss,
June 2008.
Keyword(s): Sparse.
Abstract:
Over the past few years, parallel sparse direct solvers made significant progress and are now able to solve efficiently industrial three-dimensional problems with several millions of unknowns. An hybrid MPI-thread implementation of our direct solver PaStiX is already well suited for SMP nodes or new multi-core architectures and drastically reduced the memory overhead and improved scalability. In the context of distributed NUMA architectures, a dynamic scheduler based on a work-stealing algorithm has been developed to fill in communication idle times. On these architectures, it is important to take care of NUMA effects and to preserve memory affinity during the work-stealing. The scheduling of communications also needs to be adapted, especially to ensure the overlap by computations. Experiments on numerical test cases will be presented to prove the efficiency of the approach on NUMA architectures. If memory is not large enough to treat a given problem, disks must be used to store data that cannot fit in memory (out-of-core storage). The idle-times due to disk access have to be managed by our dynamic scheduler to prefetch and save datasets. Thus, we design and study specific scheduling algorithms in this particular context. |
@InProceedings{C:LaBRI::PMAA2008b,
author = "Faverge, M. and Lacoste, X. and Ramet, P.",
title = "A NUMA Aware Scheduler for a Parallel Sparse Direct Solver",
booktitle = "Proceedings of {PMAA}'2008",
OPTcrossref = {},
OPTkey = {},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
year = "2008",
OPTorganization = {},
OPTpublisher = {},
address = {Neuchatel, Swiss},
month = jun,
OPTpages = {},
OPTnote = {},
OPTannote = {},
OPTURL = {},
KEYWORDS = "Sparse",
ABSTRACT = {Over the past few years, parallel sparse direct solvers made significant progress and are now able to solve efficiently industrial three-dimensional problems with several millions of unknowns. An hybrid MPI-thread implementation of our direct solver PaStiX is already well suited for SMP nodes or new multi-core architectures and drastically reduced the memory overhead and improved scalability. In the context of distributed NUMA architectures, a dynamic scheduler based on a work-stealing algorithm has been developed to fill in communication idle times. On these architectures, it is important to take care of NUMA effects and to preserve memory affinity during the work-stealing. The scheduling of communications also needs to be adapted, especially to ensure the overlap by computations. Experiments on numerical test cases will be presented to prove the efficiency of the approach on NUMA architectures. If memory is not large enough to treat a given problem, disks must be used to store data that cannot fit in memory (out-of-core storage). The idle-times due to disk access have to be managed by our dynamic scheduler to prefetch and save datasets. Thus, we design and study specific scheduling algorithms in this particular context.}
}
-
M. Faverge and P. Ramet.
Dynamic Scheduling for sparse direct Solver on NUMA architectures.
In Proceedings of PARA'2008,
Trondheim, Norway,
May 2008.
Keyword(s): Sparse.
Abstract:
Over the past few years, parallel sparse direct solvers made significant progress and are now able to efficiently work on problems with several millions of equations. This paper presents some improvements on our sparse direct solver PaStiX1 for distributed Non-Uniform Memory Access architectures. We show results on two preliminary works: a memory allocation scheme more adapted to these architectures and a better overlap of communication by computation. We also present a dynamic scheduler that takes care of memory affinity and data locality. |
@InProceedings{C:LaBRI::para08,
author = {Faverge, M. and Ramet, P.},
title = {Dynamic Scheduling for sparse direct Solver on NUMA architectures},
OPTcrossref = {},
OPTkey = {},
booktitle = {Proceedings of PARA'2008},
OPTpages = {},
year = {2008},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Trondheim, Norway},
month = may,
OPTorganization = {},
OPTpublisher = {},
OPTannote = {},
KEYWORDS = "Sparse",
ABSTRACT = {Over the past few years, parallel sparse direct solvers made significant progress and are now able to efficiently work on problems with several millions of equations. This paper presents some improvements on our sparse direct solver PaStiX1 for distributed Non-Uniform Memory Access architectures. We show results on two preliminary works: a memory allocation scheme more adapted to these architectures and a better overlap of communication by computation. We also present a dynamic scheduler that takes care of memory affinity and data locality.},
URL = {http://www.labri.fr/~ramet/restricted/para08.pdf},
}
-
G. Huysmans,
R. Abgrall,
M. Becoulet,
Huart R.,
B. and Nkonga,
Pamela S.,
and P. Ramet.
Non-Linear MHD code developement for ELM simulations.
In Poster session, 35th EPS Plasma Physics Conference,
Hersonissos, Greece,
June 2008.
Keyword(s): Fusion.
@InProceedings{C:LaBRI::EPS2008,
author = {Huysmans, G. and Abgrall, R. and Becoulet, M. and Huart R. and and Nkonga, B. and Pamela S. and Ramet, P.},
title = {Non-Linear {MHD} code developement for {ELM} simulations},
OPTcrossref = {},
OPTkey = {},
booktitle = {Poster session, 35th {EPS} Plasma Physics Conference},
OPTpages = {},
year = {2008},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Hersonissos, Greece},
month = jun,
OPTorganization = {},
OPTpublisher = {},
URL = {http://www.labri.fr/~ramet/restricted/eps08.pdf},
KEYWORDS = "Fusion",
OPTannote = {}
}
-
N. Kushida,
Y. Suzuki,
N. Teshima,
N. Nakajima,
Y. Caniou,
M. Dayde,
and P. Ramet.
Toward an International Sparse Linear Algebra Expert System by Interconnecting the ITBL Computational Grid with the Grid-TLSE Platform.
In VECPAR'08, 8th International Meeting High Performance Computing for Computational Science,
volume 5336 of LNCS,
Toulouse, France,
pages 424-429,
June 2008.
Springer Verlag.
@InProceedings{C:LaBRI::vecpar08-redimps,
author = {Kushida, N. and Suzuki, Y. and Teshima, N. and Nakajima, N. and Caniou, Y. and Dayde, M. and Ramet, P.},
title = {Toward an International Sparse Linear Algebra Expert System by Interconnecting the ITBL Computational Grid with the Grid-TLSE Platform},
OPTcrossref = {},
OPTkey = {},
booktitle = {VECPAR'08, 8th International Meeting High Performance Computing for Computational Science},
pages = {424--429},
year = {2008},
OPTeditor = {},
volume = {5336},
OPTnumber = {},
series = {LNCS},
address = {Toulouse, France},
month = jun,
OPTorganization = {},
publisher = {Springer Verlag},
OPTannote = {},
URL = {http://www.labri.fr/~ramet/restricted/vecpar08-redimps.pdf},
}