-
G. Pichon.
Utilisation de la compression Block Low-Rank pour accélérer un solveur direct creux supernodal.
In COMPAS 2017,
Sophia Antipolis, France,
June 2017.
Keyword(s): Low-rank compression.
@inproceedings{pichon:hal-01585660,
TITLE = {{Utilisation de la compression Block Low-Rank pour acc{\'e}l{\'e}rer un solveur direct creux supernodal}},
AUTHOR = {Pichon, G.},
URL = {https://hal.inria.fr/hal-01585660},
BOOKTITLE = {{COMPAS 2017}},
ADDRESS = {Sophia Antipolis, France},
YEAR = {2017},
MONTH = Jun,
KEYWORDS = {Low-rank compression},
PDF = {https://hal.inria.fr/hal-01585660/file/blr.pdf},
HAL_ID = {hal-01585660},
HAL_VERSION = {v1},
}
-
G. Pichon,
E. Darve,
M. Faverge,
P. Ramet,
and J. Roman.
Sparse Supernodal Solver Using Block Low-Rank Compression.
In 18th IEEE International Workshop on Parallel and Distributed Scientific and Engineering Computing (PDSEC 2017),
Orlando, United States,
June 2017.
Keyword(s): Low-rank compression.
@inproceedings{pichon:hal-01502215,
TITLE = {{Sparse Supernodal Solver Using Block Low-Rank Compression}},
AUTHOR = {Pichon, G. and Darve, E. and Faverge, M. and Ramet, P. and Roman, J.},
URL = {https://hal.inria.fr/hal-01502215},
BOOKTITLE = {{18th IEEE International Workshop on Parallel and Distributed Scientific and Engineering Computing (PDSEC 2017)}},
ADDRESS = {Orlando, United States},
YEAR = {2017},
MONTH = Jun,
KEYWORDS = {Low-rank compression},
PDF = {https://hal.inria.fr/hal-01502215/file/blr-final.pdf},
HAL_ID = {hal-01502215},
HAL_VERSION = {v1},
}
-
G. Pichon,
E. Darve,
M. Faverge,
P. Ramet,
and J. Roman.
Sparse Supernodal Solver Using Hierarchical Compression over Runtime System.
In SIAM Conference on Computation Science and Engineering,
Atlanta, USA,
February 2017.
Keyword(s): Sparse.
Abstract:
In this talk, we present the PaStiX sparse supernodal solver, using hierarchical compression to reduce the burden on large blocks appearing during the nested dissection process. We compare the numerical stability, and the performance in terms of memory consumption and time to solution of different approaches by selecting when the compression of the factorized matrix occurs. In order to improve the efficiency of the sparse update kernel for both BLR (block low rank) and HODLR (hierarchically off-diagonal low-rank), we investigate the BDLR (boundary distance low-rank) method to preselect rows and columns in the low-rank approximation algorithm. |
@InProceedings{C:LaBRI::siam2017a,
author = {Pichon, G. and Darve, E. and Faverge, M. and Ramet, P. and Roman, J.},
title = {Sparse Supernodal Solver Using Hierarchical Compression over Runtime System},
OPTcrossref = {},
OPTkey = {},
booktitle = {{SIAM} Conference on Computation Science and Engineering},
OPTpages = {},
year = {2017},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Atlanta, USA},
month = feb,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse",
ABSTRACT = {In this talk, we present the PaStiX sparse supernodal solver, using hierarchical compression to reduce the burden on large blocks appearing during the nested dissection process. We compare the numerical stability, and the performance in terms of memory consumption and time to solution of different approaches by selecting when the compression of the factorized matrix occurs. In order to improve the efficiency of the sparse update kernel for both BLR (block low rank) and HODLR (hierarchically off-diagonal low-rank), we investigate the BDLR (boundary distance low-rank) method to preselect rows and columns in the low-rank approximation algorithm.}
}
-
G. Pichon,
E. Darve,
M. Faverge,
P. Ramet,
and J. Roman.
Sparse Supernodal Solver exploiting Low-Rankness Property.
In Sparse Days 2017,
Toulouse, France,
September 2017.
Keyword(s): Low-rank compression.
Abstract:
In this talk, we will present recent advances on PaStiX, a supernodal sparse direct solver, which has been enhanced by the introduction of Block Low-Rank compression. We will describe different strategies leading to memory consumption gain and/or time-to-solution reduction. Finally, the implementation on top of runtime systems (Parsec, StarPU), will be compared with the static scheduling used in previous experiments. |
@inproceedings{pichon:hal-01585622,
TITLE = {{Sparse Supernodal Solver exploiting Low-Rankness Property}},
AUTHOR = {Pichon, G. and Darve, E. and Faverge, M. and Ramet, P. and Roman, J.},
URL = {https://hal.inria.fr/hal-01585622},
BOOKTITLE = {{Sparse Days 2017}},
ADDRESS = {Toulouse, France},
YEAR = {2017},
MONTH = Sep,
KEYWORDS = {Low-rank compression},
PDF = {https://hal.inria.fr/hal-01585622/file/S01E02-Pichon.pdf},
HAL_ID = {hal-01585622},
HAL_VERSION = {v1},
ABSTRACT = {In this talk, we will present recent advances on PaStiX, a supernodal sparse direct solver, which has been enhanced by the introduction of Block Low-Rank compression. We will describe different strategies leading to memory consumption gain and/or time-to-solution reduction. Finally, the implementation on top of runtime systems (Parsec, StarPU), will be compared with the static scheduling used in previous experiments.}
}
-
G. Pichon,
M. Faverge,
and P. Ramet.
Exploiting Modern Manycore Architecture in Sparse Direct Solver with Runtime Systems.
In SIAM Conference on Computation Science and Engineering,
Atlanta, USA,
February 2017.
Keyword(s): Sparse.
Abstract:
Sparse direct solvers is a time consuming operation required by many scientific applications to simulate physical problems. By its important overall cost, many studies tried to optimize the time to solution of those solvers on multi-core and distributed architectures. More recently, many works have addressed heterogeneous architectures to exploit accelerators such as GPUs or Intel Xeon Phi with interesting speedup. Despite researches towards generic solutions to efficiently exploit those accelerators, their hardware evolution requires continual adaptation of the kernels running on those architectures. The recent Nvidia architectures, as Kepler, present a larger number of parallel units thus requiring more data to feed every computational units. A solution considered to supply enough computation has been to study problems with large number of small computations. The batched BLAS libraries proposed by Intel, Nvidia, or the University of Tennessee are examples of this solution. We discuss in this talk the use of the variable size batched matrix-matrix multiply to improve the performance of a the PaStiX sparse direct solver. Indeed, this kernel suits the supernodal method of the solver, and the multiple updates of variable sizes that occur during the numerical factorization. Performance results on a spectrum of matrices with different properties will be presented. |
@InProceedings{C:LaBRI::siam2017b,
author = {Pichon, G. and Faverge, M. and Ramet, P.},
title = {Exploiting Modern Manycore Architecture in Sparse Direct Solver with Runtime Systems},
OPTcrossref = {},
OPTkey = {},
booktitle = {{SIAM} Conference on Computation Science and Engineering},
OPTpages = {},
year = {2017},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Atlanta, USA},
month = feb,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse",
ABSTRACT = {Sparse direct solvers is a time consuming operation required by many scientific applications to simulate physical problems. By its important overall cost, many studies tried to optimize the time to solution of those solvers on multi-core and distributed architectures. More recently, many works have addressed heterogeneous architectures to exploit accelerators such as GPUs or Intel Xeon Phi with interesting speedup. Despite researches towards generic solutions to efficiently exploit those accelerators, their hardware evolution requires continual adaptation of the kernels running on those architectures. The recent Nvidia architectures, as Kepler, present a larger number of parallel units thus requiring more data to feed every computational units. A solution considered to supply enough computation has been to study problems with large number of small computations. The batched BLAS libraries proposed by Intel, Nvidia, or the University of Tennessee are examples of this solution. We discuss in this talk the use of the variable size batched matrix-matrix multiply to improve the performance of a the PaStiX sparse direct solver. Indeed, this kernel suits the supernodal method of the solver, and the multiple updates of variable sizes that occur during the numerical factorization. Performance results on a spectrum of matrices with different properties will be presented.}
}
-
G. Pichon,
M. Faverge,
P. Ramet,
and J. Roman.
Impact of Blocking Strategies for Sparse Direct Solvers on Top of Generic Runtimes.
In SIAM Conference on Computation Science and Engineering,
Atlanta, USA,
February 2017.
Keyword(s): Sparse.
Abstract:
Among the preprocessing steps of a sparse direct solver, reordering and block symbolic factorization are two major steps to reach a suitable granularity for BLAS kernels efficiency and runtime management. In this talk, we present a reordering strategy to increase off-diagonal block sizes. It enhances BLAS kernels and allows to handle larger tasks, reducing runtime overhead. Finally, we will comment the resulting gain in the PaStiX solver implemented over StarPU and PaRSEC. |
@InProceedings{C:LaBRI::siam2017c,
author = {Pichon, G. and Faverge, M. and Ramet, P. and Roman, J.},
title = {Impact of Blocking Strategies for Sparse Direct Solvers on Top of Generic Runtimes},
OPTcrossref = {},
OPTkey = {},
booktitle = {{SIAM} Conference on Computation Science and Engineering},
OPTpages = {},
year = {2017},
OPTeditor = {},
OPTvolume = {},
OPTnumber = {},
OPTseries = {},
address = {Atlanta, USA},
month = feb,
OPTorganization = {},
OPTpublisher = {},
OPTnote = {},
OPTannote = {},
KEYWORDS = "Sparse",
ABSTRACT = {Among the preprocessing steps of a sparse direct solver, reordering and block symbolic factorization are two major steps to reach a suitable granularity for BLAS kernels efficiency and runtime management. In this talk, we present a reordering strategy to increase off-diagonal block sizes. It enhances BLAS kernels and allows to handle larger tasks, reducing runtime overhead. Finally, we will comment the resulting gain in the PaStiX solver implemented over StarPU and PaRSEC.}
}
-
M. Faverge,
S. Moustafa,
F. Févotte,
L. Plagne,
and P. Ramet.
Efficient Parallel Solution of the 3D Stationary Boltzmann Transport Equation for Diffusive Problems.
Research Report RR-9116,
Inria ; EDF Lab,
September 2017.
Keyword(s): Neutron.
@techreport{faverge:hal-01630208,
TITLE = {{Efficient Parallel Solution of the 3D Stationary Boltzmann Transport Equation for Diffusive Problems}},
AUTHOR = {Faverge, M. and Moustafa, S. and F{\'e}votte, F. and Plagne, L. and Ramet, P.},
URL = {https://hal.inria.fr/hal-01630208},
TYPE = {Research Report},
NUMBER = {RR-9116},
PAGES = {22},
INSTITUTION = {{Inria ; EDF Lab}},
YEAR = {2017},
MONTH = Sep,
KEYWORDS = {Neutron},
PDF = {https://hal.inria.fr/hal-01630208/file/RR-9116-1.pdf},
HAL_ID = {hal-01630208},
HAL_VERSION = {v1},
}
-
G. Pichon,
E. Darve,
M. Faverge,
P. Ramet,
and J. Roman.
Sparse Supernodal Solver Using Block Low-Rank Compression.
Research Report RR-9022,
Inria Bordeaux Sud-Ouest,
January 2017.
Keyword(s): Low-rank compression.
@techreport{pichon:hal-01450732,
TITLE = {{Sparse Supernodal Solver Using Block Low-Rank Compression}},
AUTHOR = {Pichon, G. and Darve, E. and Faverge, M. and Ramet, P. and Roman, J.},
URL = {https://hal.inria.fr/hal-01450732},
TYPE = {Research Report},
NUMBER = {RR-9022},
PAGES = {24},
INSTITUTION = {{Inria Bordeaux Sud-Ouest}},
YEAR = {2017},
MONTH = Jan,
KEYWORDS = {Low-rank compression},
PDF = {https://hal.inria.fr/hal-01450732/file/RR-9022.pdf},
HAL_ID = {hal-01450732},
HAL_VERSION = {v1},
}
-
G. Pichon,
E. Darve,
M. Faverge,
P. Ramet,
and J. Roman.
Sparse Supernodal Solver Using Block Low-Rank Compression: design, performance and analysis.
Research Report RR-9130,
Inria Bordeaux Sud-Ouest,
December 2017.
Keyword(s): Low-rank compression.
@techreport{pichon:hal-01660665,
TITLE = {{Sparse Supernodal Solver Using Block Low-Rank Compression: design, performance and analysis}},
AUTHOR = {Pichon, G. and Darve, E. and Faverge, M. and Ramet, P. and Roman, J.},
URL = {https://hal.inria.fr/hal-01660665},
TYPE = {Research Report},
NUMBER = {RR-9130},
PAGES = {1-32},
INSTITUTION = {{Inria Bordeaux Sud-Ouest}},
YEAR = {2017},
MONTH = Dec,
KEYWORDS = {Low-rank compression},
PDF = {https://hal.inria.fr/hal-01660665/file/RR-9130.pdf},
HAL_ID = {hal-01660665},
HAL_VERSION = {v1},
}