Publications
List of journals and conference papers
Papers
2019 |
Tomas Panoc; Ondrej, Meca; Tomas Brzobohaty; Lubomir Riha; Lukas Maly An automatically generated graphical configuration tool for the massively parallel library ESPRESO Conference AIP Conference Proceedings, 2019. @conference{ICNAAM2018TP, title = {An automatically generated graphical configuration tool for the massively parallel library ESPRESO}, author = {Tomas, Panoc; Ondrej, Meca; Tomas, Brzobohaty; Lubomir, Riha; Lukas, Maly}, url = {https://doi.org/10.1063/1.5114334}, year = {2019}, date = {2019-07-24}, booktitle = {AIP Conference Proceedings}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Riha, Lubomir; Merta, Michal; Vavrik, Radim; Brzobohaty, Tomas; Markopoulos, Alexandros; Meca, Ondrej; Vysocky, Ondrej; Kozubek, Tomas; Vondrak, Vit A massively parallel and memory-efficient FEM toolbox with a hybrid total FETI solver with accelerator support Journal Article INTERNATIONAL JOURNAL OF HIGH PERFORMANCE COMPUTING APPLICATIONS, 33 (4), pp. 660-677, 2019, ISSN: 1094-3420. @article{ISI:000471881700007, title = {A massively parallel and memory-efficient FEM toolbox with a hybrid total FETI solver with accelerator support}, author = {Lubomir Riha and Michal Merta and Radim Vavrik and Tomas Brzobohaty and Alexandros Markopoulos and Ondrej Meca and Ondrej Vysocky and Tomas Kozubek and Vit Vondrak}, doi = {10.1177/1094342018798452}, issn = {1094-3420}, year = {2019}, date = {2019-07-01}, journal = {INTERNATIONAL JOURNAL OF HIGH PERFORMANCE COMPUTING APPLICATIONS}, volume = {33}, number = {4}, pages = {660-677}, publisher = {SAGE PUBLICATIONS LTD}, address = {1 OLIVERS YARD, 55 CITY ROAD, LONDON EC1Y 1SP, ENGLAND}, abstract = {In this article, we present the ExaScale PaRallel finite element tearing and interconnecting SOlver (ESPRESO) finite element method (FEM) library, which includes an FEM toolbox with interfaces to professional and open-source simulation tools, and a massively parallel hybrid total finite element tearing and interconnecting (HTFETI) solver which can fully utilize the Oak Ridge Leadership Computing Facility Titan supercomputer and achieve superlinear scaling. This article presents several new techniques for finite element tearing and interconnecting (FETI) solvers designed for efficient utilization of supercomputers with a focus on (i) performance-we present a fivefold reduction of solver runtime for the Laplace equation by redesigning the FETI solver and offloading the key workload to the accelerator. We compare Intel Xeon Phi 7120p and Tesla K80 and P100 accelerators to Intel Xeon E5-2680v3 and Xeon Phi 7210 central processing units; and (ii) memory efficiency-we present two techniques which increase the efficiency of the HTFETI solver 1.8 times and push the limits of the largest possible problem ESPRESO that can solve from 124 to 223 billion unknowns for problems with unstructured meshes. Finally, we show that by dynamically tuning hardware parameters, we can reduce energy consumption by up to 33%.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this article, we present the ExaScale PaRallel finite element tearing and interconnecting SOlver (ESPRESO) finite element method (FEM) library, which includes an FEM toolbox with interfaces to professional and open-source simulation tools, and a massively parallel hybrid total finite element tearing and interconnecting (HTFETI) solver which can fully utilize the Oak Ridge Leadership Computing Facility Titan supercomputer and achieve superlinear scaling. This article presents several new techniques for finite element tearing and interconnecting (FETI) solvers designed for efficient utilization of supercomputers with a focus on (i) performance-we present a fivefold reduction of solver runtime for the Laplace equation by redesigning the FETI solver and offloading the key workload to the accelerator. We compare Intel Xeon Phi 7120p and Tesla K80 and P100 accelerators to Intel Xeon E5-2680v3 and Xeon Phi 7210 central processing units; and (ii) memory efficiency-we present two techniques which increase the efficiency of the HTFETI solver 1.8 times and push the limits of the largest possible problem ESPRESO that can solve from 124 to 223 billion unknowns for problems with unstructured meshes. Finally, we show that by dynamically tuning hardware parameters, we can reduce energy consumption by up to 33%. |
Dostal, Zdenek; Vlach, Oldrich; Brzobohaty, Tomas FINITE ELEMENTS IN ANALYSIS AND DESIGN, 156 , pp. 34-43, 2019, ISSN: 0168-874X. @article{ISI:000457631000004, title = {Scalable TFETI based algorithm with adaptive augmentation for contact problems with variationally consistent discretization of contact conditions}, author = {Zdenek Dostal and Oldrich Vlach and Tomas Brzobohaty}, doi = {10.1016/j.finel.2019.01.002}, issn = {0168-874X}, year = {2019}, date = {2019-04-01}, journal = {FINITE ELEMENTS IN ANALYSIS AND DESIGN}, volume = {156}, pages = {34-43}, publisher = {ELSEVIER SCIENCE BV}, address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS}, abstract = {A variationally consistent approximation of contact conditions by means of biorthogonal mortars was introduced by Wohlmuth as a powerful theoretically supported tool for the discretization of contact problems. This approach is especially useful when a potential contact interface is large and curved or when nonmatching grids are applied, but its effective implementation into FETI based algorithms is not straightforward due to the ill conditioning of related inequality constraints. In this paper we review the mortar discretization and theoretical results on scalability of the FETI based algorithm and show that the recently proposed adaptive augmentation can overcome the difficulties caused by the ill-conditioning of constraints. We demonstrate the (weak) numerical scalability by numerical experiments and present the results for a difficult real world problem discretized by mortars that show the power of the new algorithm - the number of iterations required to the solution of this problem discretized by mortars is just one third of that required by the original algorithm for the same problem discretized by node-to-node constraints.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A variationally consistent approximation of contact conditions by means of biorthogonal mortars was introduced by Wohlmuth as a powerful theoretically supported tool for the discretization of contact problems. This approach is especially useful when a potential contact interface is large and curved or when nonmatching grids are applied, but its effective implementation into FETI based algorithms is not straightforward due to the ill conditioning of related inequality constraints. In this paper we review the mortar discretization and theoretical results on scalability of the FETI based algorithm and show that the recently proposed adaptive augmentation can overcome the difficulties caused by the ill-conditioning of constraints. We demonstrate the (weak) numerical scalability by numerical experiments and present the results for a difficult real world problem discretized by mortars that show the power of the new algorithm - the number of iterations required to the solution of this problem discretized by mortars is just one third of that required by the original algorithm for the same problem discretized by node-to-node constraints. |
2018 |
Meca, Ondrej; Riha, Lubomir; Markopoulos, Alexandros; Brzobohaty, Tomas; Kozubek, Tomas Using ESPRESO as Linear Solver Library for Third Party FEM Tools for Solving Large Scale Problems Inproceedings {Kozubek, T; Cermak, M; Tichy, P; Blaheta, R; Sistek, J; Lukas, D; Jaros, J} (Ed.): HIGH PERFORMANCE COMPUTING IN SCIENCE AND ENGINEERING, HPCSE 2017, pp. 130-143, VSB Tech Univ Ostrava, IT4Innovat Natl Supercomputing Ctr SPRINGER INTERNATIONAL PUBLISHING AG, GEWERBESTRASSE 11, CHAM, CH-6330, SWITZERLAND, 2018, ISSN: 0302-9743, (3rd International Conference on High Performance Computing in Science and Engineering (HPCSE), Karolinka, CZECH REPUBLIC, MAY 22-25, 2017). @inproceedings{ISI:000469334300010, title = {Using ESPRESO as Linear Solver Library for Third Party FEM Tools for Solving Large Scale Problems}, author = {Ondrej Meca and Lubomir Riha and Alexandros Markopoulos and Tomas Brzobohaty and Tomas Kozubek}, editor = {T {Kozubek and M Cermak and P Tichy and R Blaheta and J Sistek and D Lukas and J} Jaros}, doi = {10.1007/978-3-319-97136-0_10}, issn = {0302-9743}, year = {2018}, date = {2018-01-01}, booktitle = {HIGH PERFORMANCE COMPUTING IN SCIENCE AND ENGINEERING, HPCSE 2017}, volume = {11087}, pages = {130-143}, publisher = {SPRINGER INTERNATIONAL PUBLISHING AG}, address = {GEWERBESTRASSE 11, CHAM, CH-6330, SWITZERLAND}, organization = {VSB Tech Univ Ostrava, IT4Innovat Natl Supercomputing Ctr}, series = {Lecture Notes in Computer Science}, abstract = {ESPRESO is a FEM package that includes a Hybrid Total FETI (HTFETI) linear solver targeted at solving large scale engineering problems. The scalability of the solver was tested on several of the world's largest supercomputers. To provide our scalable implementation of HTFETI algorithms to all potential users, a simple C API was developed and is presented. The paper describes API methods, compilation and linking process. As a proof of concept we interfaced ESPRESO with the CSC ELMER solver and compared its performance with the ELMER FETI solver. HTFETI performs two level decomposition, which significantly improves both memory utilization and solver performance. To select optimal second level decomposition we have developed a performance model that controls decomposition automatically. This is a major simplification for all users that ensures optimal solver settings. We show that the ESPRESO HTFETI solver is up to 3.7 times faster than the ELMER FETI solver when running on 13 500 MPI processes (the 614 compute nodes of the Salomon supercomputer) and solving 1.5 billion unknown problems of 3D linear elasticity.}, note = {3rd International Conference on High Performance Computing in Science and Engineering (HPCSE), Karolinka, CZECH REPUBLIC, MAY 22-25, 2017}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } ESPRESO is a FEM package that includes a Hybrid Total FETI (HTFETI) linear solver targeted at solving large scale engineering problems. The scalability of the solver was tested on several of the world's largest supercomputers. To provide our scalable implementation of HTFETI algorithms to all potential users, a simple C API was developed and is presented. The paper describes API methods, compilation and linking process. As a proof of concept we interfaced ESPRESO with the CSC ELMER solver and compared its performance with the ELMER FETI solver. HTFETI performs two level decomposition, which significantly improves both memory utilization and solver performance. To select optimal second level decomposition we have developed a performance model that controls decomposition automatically. This is a major simplification for all users that ensures optimal solver settings. We show that the ESPRESO HTFETI solver is up to 3.7 times faster than the ELMER FETI solver when running on 13 500 MPI processes (the 614 compute nodes of the Salomon supercomputer) and solving 1.5 billion unknown problems of 3D linear elasticity. |
Vavrik, R; Riha, L Acceleration techniques for feti solvers for GPU accelerators Conference Proceedings - 2018 International Conference on High Performance Computing and Simulation, HPCS 2018, 2018. @conference{Vavrik2018500, title = {Acceleration techniques for feti solvers for GPU accelerators}, author = {R Vavrik and L Riha}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85057415411&doi=10.1109%2fHPCS.2018.00091&partnerID=40&md5=ef573f95256a31330886595ef00711e5}, doi = {10.1109/HPCS.2018.00091}, year = {2018}, date = {2018-01-01}, booktitle = {Proceedings - 2018 International Conference on High Performance Computing and Simulation, HPCS 2018}, pages = {500-507}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Maly, L; Zapletal, J; Merta, M; Riha, L; Vondrak, V Evaluation of the Intel Xeon Phi offload runtimes for domain decomposition solvers Journal Article Advances in Engineering Software, 125 , pp. 146-154, 2018, (cited By 1). @article{Maly2018146, title = {Evaluation of the Intel Xeon Phi offload runtimes for domain decomposition solvers}, author = {L Maly and J Zapletal and M Merta and L Riha and V Vondrak}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85049557829&doi=10.1016%2fj.advengsoft.2018.06.011&partnerID=40&md5=46e9d69e105a262be50473f0241de773}, doi = {10.1016/j.advengsoft.2018.06.011}, year = {2018}, date = {2018-01-01}, journal = {Advances in Engineering Software}, volume = {125}, pages = {146-154}, note = {cited By 1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Markopoulos, A; Říha, L; Brzobohatý, T; Meca, O; Kučera, R; Kozubek, T The htfeti method variant gluing cluster subdomains by kernel matrices representing the rigid body motions Journal Article Lecture Notes in Computational Science and Engineering, 125 , pp. 543-551, 2018, (cited By 0). @article{Markopoulos2018543, title = {The htfeti method variant gluing cluster subdomains by kernel matrices representing the rigid body motions}, author = {A Markopoulos and L Říha and T Brzobohatý and O Meca and R Kučera and T Kozubek}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85060284230&doi=10.1007%2f978-3-319-93873-8_52&partnerID=40&md5=35d768124a3c738f25cea8596651e085}, doi = {10.1007/978-3-319-93873-8_52}, year = {2018}, date = {2018-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {125}, pages = {543-551}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2017 |
Merta, Michal; Riha, Lubomir; Meca, Ondrej; Markopoulos, Alexandros; Brzobohaty, Tomas; Kozubek, Tomas; Vondrak, Vit Intel Xeon Phi acceleration of Hybrid Total FETI solver Journal Article ADVANCES IN ENGINEERING SOFTWARE, 112 , pp. 124-135, 2017, ISSN: 0965-9978. @article{ISI:000405456900011, title = {Intel Xeon Phi acceleration of Hybrid Total FETI solver}, author = {Michal Merta and Lubomir Riha and Ondrej Meca and Alexandros Markopoulos and Tomas Brzobohaty and Tomas Kozubek and Vit Vondrak}, doi = {10.1016/j.advengsoft.2017.05.001}, issn = {0965-9978}, year = {2017}, date = {2017-10-01}, journal = {ADVANCES IN ENGINEERING SOFTWARE}, volume = {112}, pages = {124-135}, publisher = {ELSEVIER SCI LTD}, address = {THE BOULEVARD, LANGFORD LANE, KIDLINGTON, OXFORD OX5 1GB, OXON, ENGLAND}, abstract = {This paper describes an approach for acceleration of the Hybrid Total FETI (HTFETI) domain decomposition method using the Intel Xeon Phi coprocessors. The HTFETI method is a memory bound algorithm which uses sparse linear BLAS operations with irregular memory access pattern. The presented local Schur complement (LSC) method has regular memory access pattern, that allows the solver to fully utilize the Intel Xeon Phi fast memory bandwidth. This translates to speedup over 10.9 of the HTFETI iterative solver when solving 3 billion unknown heat transfer problem (3D Laplace equation) on almost 400 compute nodes. The comparison is between the CPU computation using sparse data structures (PARDISO sparse direct solver) and the LSC computation on Xeon Phi. In the case of the structural mechanics problem (3D linear elasticity) of size 1 billion DOFs the respective speedup is 3.4. The presented speedups are asymptotic and they are reached for problems requiring high number of iterations (e.g., ill-conditioned problems, transient problems, contact problems). For problems which can be solved with under hundred iterations the local Schur complement method is not optimal. For these cases we have implemented sparse matrix processing using PARDISO also for the Xeon Phi accelerators. (C) 2017 Elsevier Ltd. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper describes an approach for acceleration of the Hybrid Total FETI (HTFETI) domain decomposition method using the Intel Xeon Phi coprocessors. The HTFETI method is a memory bound algorithm which uses sparse linear BLAS operations with irregular memory access pattern. The presented local Schur complement (LSC) method has regular memory access pattern, that allows the solver to fully utilize the Intel Xeon Phi fast memory bandwidth. This translates to speedup over 10.9 of the HTFETI iterative solver when solving 3 billion unknown heat transfer problem (3D Laplace equation) on almost 400 compute nodes. The comparison is between the CPU computation using sparse data structures (PARDISO sparse direct solver) and the LSC computation on Xeon Phi. In the case of the structural mechanics problem (3D linear elasticity) of size 1 billion DOFs the respective speedup is 3.4. The presented speedups are asymptotic and they are reached for problems requiring high number of iterations (e.g., ill-conditioned problems, transient problems, contact problems). For problems which can be solved with under hundred iterations the local Schur complement method is not optimal. For these cases we have implemented sparse matrix processing using PARDISO also for the Xeon Phi accelerators. (C) 2017 Elsevier Ltd. All rights reserved. |
Říha, L; Brzobohatý, T; Markopoulos, A Hybrid parallelization of the total FETI solver Journal Article Advances in Engineering Software, 103 , pp. 29-37, 2017, (cited By 2). @article{Říha201729, title = {Hybrid parallelization of the total FETI solver}, author = {L Říha and T Brzobohatý and A Markopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84975679539&doi=10.1016%2fj.advengsoft.2016.04.004&partnerID=40&md5=a444dfabe3fd20847ac001d5da907ea2}, doi = {10.1016/j.advengsoft.2016.04.004}, year = {2017}, date = {2017-01-01}, journal = {Advances in Engineering Software}, volume = {103}, pages = {29-37}, note = {cited By 2}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Markopoulos, A; Říha, L; Brzobohatý, T; Jirůtková, P; Kučera, R; Meca, O; Kozubek, T Treatment of singular matrices in the hybrid total FETI method Journal Article Lecture Notes in Computational Science and Engineering, 116 , pp. 237-244, 2017, (cited By 1). @article{Markopoulos2017237, title = {Treatment of singular matrices in the hybrid total FETI method}, author = {A Markopoulos and L Říha and T Brzobohatý and P Jirůtková and R Kučera and O Meca and T Kozubek}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85016210689&doi=10.1007%2f978-3-319-52389-7_23&partnerID=40&md5=62fe0c8a0154663e092b04cf395c355c}, doi = {10.1007/978-3-319-52389-7_23}, year = {2017}, date = {2017-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {116}, pages = {237-244}, note = {cited By 1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Markopoulos, A; Kucera, R; Brzobohaty, T; Riha, L; Meca, O; Ryska, V; Kozubek, T HTFETI method for non-symmetric problems Journal Article Civil-Comp Proceedings, 111 , 2017, (cited By 0). @article{Markopoulos2017b, title = {HTFETI method for non-symmetric problems}, author = {A Markopoulos and R Kucera and T Brzobohaty and L Riha and O Meca and V Ryska and T Kozubek}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85020459434&partnerID=40&md5=757761bc024de5395f04e82d1922d5f9}, year = {2017}, date = {2017-01-01}, journal = {Civil-Comp Proceedings}, volume = {111}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2016 |
Riha, Lubomir; Brzobohaty, Tomas; Markopoulos, Alexandros; Jarosova, Marta; Kozubek, Tomas; Horak, David; Hapla, Vaclav Implementation of the efficient communication layer for the highly parallel total FETI and hybrid total FETI solvers Journal Article PARALLEL COMPUTING, 57 , pp. 154-166, 2016, ISSN: 0167-8191, (8th International Workshop on Parallel Matrix Algorithms and Applications (PMAA), Univ Svizzera Italiana, Lugano, SWITZERLAND, JUL 02-04, 2014). @article{ISI:000383307100012, title = {Implementation of the efficient communication layer for the highly parallel total FETI and hybrid total FETI solvers}, author = {Lubomir Riha and Tomas Brzobohaty and Alexandros Markopoulos and Marta Jarosova and Tomas Kozubek and David Horak and Vaclav Hapla}, doi = {10.1016/j.parco.2016.05.002}, issn = {0167-8191}, year = {2016}, date = {2016-09-01}, journal = {PARALLEL COMPUTING}, volume = {57}, pages = {154-166}, publisher = {ELSEVIER SCIENCE BV}, address = {PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS}, abstract = {This paper describes the implementation, performance, and scalability of our communication layer developed for Total FETI (TFETI) and Hybrid Total FETI (HTFETI) solvers. HTFETI is based on our variant of the Finite Element Tearing and Interconnecting (FETI) type domain decomposition method. In this approach a small number of neighboring subdomains is aggregated into clusters, which results in a smaller coarse problem. To solve the original problem TFETI method is applied twice: to the clusters and then to the subdomains in each cluster. The current implementation of the solver is focused on the performance optimization of the main CG iteration loop, including: implementation of communication hiding and avoiding techniques for global communications; optimization of the nearest neighbor communication - multiplication with a global gluing matrix; and optimization of the parallel CG algorithm to iterate over local Lagrange multipliers only. The performance is demonstrated on a linear elasticity 3D cube and real world benchmarks. (C) 2016 Elsevier S.V. All rights reserved.}, note = {8th International Workshop on Parallel Matrix Algorithms and Applications (PMAA), Univ Svizzera Italiana, Lugano, SWITZERLAND, JUL 02-04, 2014}, keywords = {}, pubstate = {published}, tppubtype = {article} } This paper describes the implementation, performance, and scalability of our communication layer developed for Total FETI (TFETI) and Hybrid Total FETI (HTFETI) solvers. HTFETI is based on our variant of the Finite Element Tearing and Interconnecting (FETI) type domain decomposition method. In this approach a small number of neighboring subdomains is aggregated into clusters, which results in a smaller coarse problem. To solve the original problem TFETI method is applied twice: to the clusters and then to the subdomains in each cluster. The current implementation of the solver is focused on the performance optimization of the main CG iteration loop, including: implementation of communication hiding and avoiding techniques for global communications; optimization of the nearest neighbor communication - multiplication with a global gluing matrix; and optimization of the parallel CG algorithm to iterate over local Lagrange multipliers only. The performance is demonstrated on a linear elasticity 3D cube and real world benchmarks. (C) 2016 Elsevier S.V. All rights reserved. |
Ríha, L; Brzobohatý, T; Markopoulos, A; Meca, O; Kozubek, T Massively parallel hybrid total FETI (HTFETI) solver Conference 2016, (cited By 9). @conference{Ríha2016, title = {Massively parallel hybrid total FETI (HTFETI) solver}, author = {L Ríha and T Brzobohatý and A Markopoulos and O Meca and T Kozubek}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84978743978&doi=10.1145%2f2929908.2929909&partnerID=40&md5=9cf3d1a3599b63c91dceb7fda4db11cd}, doi = {10.1145/2929908.2929909}, year = {2016}, date = {2016-01-01}, journal = {PASC 2016 - Proceedings of the Platform for Advanced Scientific Computing Conference}, note = {cited By 9}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Říha, L; Brzobohatý, T; Markopoulos, A; Kozubek, T; Meca, O; Schenk, O; Vanroose, W Efficient implementation of total FETI solver for graphic processing units using Schur complement Journal Article Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 9611 , pp. 85-100, 2016, (cited By 5). @article{Říha201685, title = {Efficient implementation of total FETI solver for graphic processing units using Schur complement}, author = {L Říha and T Brzobohatý and A Markopoulos and T Kozubek and O Meca and O Schenk and W Vanroose}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84976649907&doi=10.1007%2f978-3-319-40361-8_6&partnerID=40&md5=b1a92b454d90e8dfd81eaeecac5c55c9}, doi = {10.1007/978-3-319-40361-8_6}, year = {2016}, date = {2016-01-01}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {9611}, pages = {85-100}, note = {cited By 5}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2015 |
Riha, Lubomir; Brzobohaty, Tomas; Markopoulos, Alexandros; Jarosova, Marta; Kozubek, Tomas Implementation of Hybrid Total FETI (HTFETI) Solver for Multi-core Architectures Inproceedings {Simos, TE; Tsitouras, C} (Ed.): PROCEEDINGS OF THE INTERNATIONAL CONFERENCE OF NUMERICAL ANALYSIS AND APPLIED MATHEMATICS 2014 (ICNAAM-2014), AMER INST PHYSICS, 2 HUNTINGTON QUADRANGLE, STE 1NO1, MELVILLE, NY 11747-4501 USA, 2015, ISSN: 0094-243X, (International Conference on Numerical Analysis and Applied Mathematics (ICNAAM), Rhodes, GREECE, SEP 22-28, 2014). @inproceedings{ISI:000355339705025, title = {Implementation of Hybrid Total FETI (HTFETI) Solver for Multi-core Architectures}, author = {Lubomir Riha and Tomas Brzobohaty and Alexandros Markopoulos and Marta Jarosova and Tomas Kozubek}, editor = {TE {Simos and C} Tsitouras}, doi = {10.1063/1.4913028}, issn = {0094-243X}, year = {2015}, date = {2015-01-01}, booktitle = {PROCEEDINGS OF THE INTERNATIONAL CONFERENCE OF NUMERICAL ANALYSIS AND APPLIED MATHEMATICS 2014 (ICNAAM-2014)}, volume = {1648}, publisher = {AMER INST PHYSICS}, address = {2 HUNTINGTON QUADRANGLE, STE 1NO1, MELVILLE, NY 11747-4501 USA}, series = {AIP Conference Proceedings}, abstract = {We describe the implementation and the performance and scalability results of a hybrid FETI (Finite Element Tearing and Interconnecting) solver based on our variant of the FETI type domain decomposition method called Total FETI. In our approach a small number of neighboring subdomains is aggregated into clusters, which results into a smaller coarse problem. To solve the original problem Total FETI method is applied twice: to the clusters and then to the subdomains in each cluster. Current implementation of the solver is focused on the performance optimization of the main CG iteration loop, including: implementation of communication hiding and avoiding techniques for global communications; optimization of the nearest neighbor communication - multiplication with global gluing matrix; and optimization of the parallel CG algorithm to iterate over local Lagrange multipliers only. The performance is demonstrated on a linear elasticity synthetic 3D cube and real world benchmarks.}, note = {International Conference on Numerical Analysis and Applied Mathematics (ICNAAM), Rhodes, GREECE, SEP 22-28, 2014}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We describe the implementation and the performance and scalability results of a hybrid FETI (Finite Element Tearing and Interconnecting) solver based on our variant of the FETI type domain decomposition method called Total FETI. In our approach a small number of neighboring subdomains is aggregated into clusters, which results into a smaller coarse problem. To solve the original problem Total FETI method is applied twice: to the clusters and then to the subdomains in each cluster. Current implementation of the solver is focused on the performance optimization of the main CG iteration loop, including: implementation of communication hiding and avoiding techniques for global communications; optimization of the nearest neighbor communication - multiplication with global gluing matrix; and optimization of the parallel CG algorithm to iterate over local Lagrange multipliers only. The performance is demonstrated on a linear elasticity synthetic 3D cube and real world benchmarks. |
Ríha, L; Brzobohatý, T; Markopoulos, A Highly scalable FETI methods in ESPRESO Journal Article Civil-Comp Proceedings, 107 , 2015, (cited By 0). @article{Ríha2015, title = {Highly scalable FETI methods in ESPRESO}, author = {L Ríha and T Brzobohatý and A Markopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84971647394&partnerID=40&md5=6f24664825fd7a2b4db35cf8f149773f}, year = {2015}, date = {2015-01-01}, journal = {Civil-Comp Proceedings}, volume = {107}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2014 |
Dostal, Zdenek; Brzobohaty, Tomas; Horak, David; Kozubek, Tomas; Vodstrcil, Petr COMPUTERS & MATHEMATICS WITH APPLICATIONS, 67 (3), pp. 515-526, 2014, ISSN: 0898-1221. @article{ISI:000331506500003, title = {On R-linear convergence of semi-monotonic inexact augmented Lagrangians for bound and equality constrained quadratic programming problems with application}, author = {Zdenek Dostal and Tomas Brzobohaty and David Horak and Tomas Kozubek and Petr Vodstrcil}, doi = {10.1016/j.camwa.2013.11.009}, issn = {0898-1221}, year = {2014}, date = {2014-02-01}, journal = {COMPUTERS & MATHEMATICS WITH APPLICATIONS}, volume = {67}, number = {3}, pages = {515-526}, publisher = {PERGAMON-ELSEVIER SCIENCE LTD}, address = {THE BOULEVARD, LANGFORD LANE, KIDLINGTON, OXFORD OX5 1GB, ENGLAND}, abstract = {New convergence results for a variant of the inexact augmented Lagrangian algorithm SMALBE [Z. Dostal, An optimal algorithm for bound and equality constrained quadratic programming problems with bounded spectrum, Computing 78 (2006) 311-328] for the solution of strictly convex bound and equality constrained quadratic programming problems are presented. The algorithm SMALBE-M presented here uses a fixed regularization parameter and controls the precision of the solution of auxiliary bound constrained problems by a multiple of the norm of violation of the equality constraints and a constant which is updated in order to enforce the increase of Lagrangian function. A nice feature of SMALBE-M is its capability to find an approximate solution of important classes of problems in a number of iterations that is independent of the conditioning of the equality constraints. Here we prove the R-linear rate of convergence of the outer loop of SMALBE-M for any positive regularization parameter after the strong active constraints of the solution are identified. The theoretical results are illustrated by solving two benchmarks, including the contact problem of elasticity discretized by two million of nodal variables. The numerical experiments indicate that the inexact solution of auxiliary problems in the inner loop results in a very small increase of the number of outer iterations as compared with the exact algorithm. The results do not assume independent equality constraints and remain valid when the solution is dual degenerate. (C) 2013 Elsevier Ltd. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } New convergence results for a variant of the inexact augmented Lagrangian algorithm SMALBE [Z. Dostal, An optimal algorithm for bound and equality constrained quadratic programming problems with bounded spectrum, Computing 78 (2006) 311-328] for the solution of strictly convex bound and equality constrained quadratic programming problems are presented. The algorithm SMALBE-M presented here uses a fixed regularization parameter and controls the precision of the solution of auxiliary bound constrained problems by a multiple of the norm of violation of the equality constraints and a constant which is updated in order to enforce the increase of Lagrangian function. A nice feature of SMALBE-M is its capability to find an approximate solution of important classes of problems in a number of iterations that is independent of the conditioning of the equality constraints. Here we prove the R-linear rate of convergence of the outer loop of SMALBE-M for any positive regularization parameter after the strong active constraints of the solution are identified. The theoretical results are illustrated by solving two benchmarks, including the contact problem of elasticity discretized by two million of nodal variables. The numerical experiments indicate that the inexact solution of auxiliary problems in the inner loop results in a very small increase of the number of outer iterations as compared with the exact algorithm. The results do not assume independent equality constraints and remain valid when the solution is dual degenerate. (C) 2013 Elsevier Ltd. All rights reserved. |
Markopoulos, A; Dostál, Z; Kozubek, T; Kovář, P; Brzobohatý, T; Kučera, R Stable computations of generalized inverses of positive semidefinite matrices Journal Article Lecture Notes in Computational Science and Engineering, 98 , pp. 909-916, 2014, (cited By 0). @article{Markopoulos2014909, title = {Stable computations of generalized inverses of positive semidefinite matrices}, author = {A Markopoulos and Z Dostál and T Kozubek and P Kovář and T Brzobohatý and R Kučera}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84910675454&doi=10.1007%2f978-3-319-05789-7__88&partnerID=40&md5=f9ef89f4563184b9df329314733ad76f}, doi = {10.1007/978-3-319-05789-7__88}, year = {2014}, date = {2014-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {98}, pages = {909-916}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Jarošová, M; Brzobohatý, T; Markopoulos, A Two level feti method for transient problems Conference 2014, (cited By 0). @conference{Jarošová20147199b, title = {Two level feti method for transient problems}, author = {M Jarošová and T Brzobohatý and A Markopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84923974776&partnerID=40&md5=a1737ea9e912f05b67ffa1bfadaf25bf}, year = {2014}, date = {2014-01-01}, journal = {11th World Congress on Computational Mechanics, WCCM 2014, 5th European Conference on Computational Mechanics, ECCM 2014 and 6th European Conference on Computational Fluid Dynamics, ECFD 2014}, pages = {7199-7206}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
2013 |
Lukáš, D; Sadowská, M; Kozubek, T; Markopoulos, A; Brzobohatý, T A Comparison of TFETI and TBETI for Numerical Solution of Engineering Problems of Contact Mechanics Journal Article Lecture Notes in Computational Science and Engineering, 91 , pp. 345-352, 2013, (cited By 0). @article{Lukáš2013345, title = {A Comparison of TFETI and TBETI for Numerical Solution of Engineering Problems of Contact Mechanics}, author = {D Lukáš and M Sadowská and T Kozubek and A Markopoulos and T Brzobohatý}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84880479906&doi=10.1007%2f978-3-642-35275-1_40&partnerID=40&md5=6415173c5df206f44f56fb1ecd8f5ef6}, doi = {10.1007/978-3-642-35275-1_40}, year = {2013}, date = {2013-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {91}, pages = {345-352}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Kozubek, T; Dostál, Z; Brzobohatý, T; Markopoulos, A; Vlach, O TFETI Scalable Solvers for Transient Contact Problems Journal Article Lecture Notes in Computational Science and Engineering, 91 , pp. 329-336, 2013, (cited By 0). @article{Kozubek2013329, title = {TFETI Scalable Solvers for Transient Contact Problems}, author = {T Kozubek and Z Dostál and T Brzobohatý and A Markopoulos and O Vlach}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84880456671&doi=10.1007%2f978-3-642-35275-1_38&partnerID=40&md5=209fdfd8f9543fb7af02919c4406ad7f}, doi = {10.1007/978-3-642-35275-1_38}, year = {2013}, date = {2013-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {91}, pages = {329-336}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Dostál, Z; Kozubek, T; Brzobohatý, T; Markopoulos, A; Sadowská, M; Vondrák, V Scalable Domain Decomposition Algorithms for Contact Problems: Theory, Numerical Experiments, and Real World Problems Journal Article Lecture Notes in Computational Science and Engineering, 91 , pp. 39-49, 2013, (cited By 0). @article{Dostál201339b, title = {Scalable Domain Decomposition Algorithms for Contact Problems: Theory, Numerical Experiments, and Real World Problems}, author = {Z Dostál and T Kozubek and T Brzobohatý and A Markopoulos and M Sadowská and V Vondrák}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84880471685&doi=10.1007%2f978-3-642-35275-1_4&partnerID=40&md5=40bd372b2a340f6dbf13e5ec23c92b2f}, doi = {10.1007/978-3-642-35275-1_4}, year = {2013}, date = {2013-01-01}, journal = {Lecture Notes in Computational Science and Engineering}, volume = {91}, pages = {39-49}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Dostál, Z; Kozubek, T; Brzobohatý, T; Markopoulos, A; Vondrák, V Scalable TFETI algorithm for frictionless contact problems: Theory and real world problems Journal Article Lecture Notes in Applied and Computational Mechanics, 56 LNACM , pp. 113-130, 2013, (cited By 0). @article{Dostál2013113b, title = {Scalable TFETI algorithm for frictionless contact problems: Theory and real world problems}, author = {Z Dostál and T Kozubek and T Brzobohatý and A Markopoulos and V Vondrák}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84870757857&doi=10.1007%2f978-3-642-33968-4_8&partnerID=40&md5=89e99528e03a64178d408c2a72f6284c}, doi = {10.1007/978-3-642-33968-4_8}, year = {2013}, date = {2013-01-01}, journal = {Lecture Notes in Applied and Computational Mechanics}, volume = {56 LNACM}, pages = {113-130}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2012 |
Markopoulos, A; Brzobohaty, T; Kozubek, T; Dostal, Z Total FETI method and singular matrices in engineering problems Conference 2012, (cited By 0). @conference{Markopoulos20125411, title = {Total FETI method and singular matrices in engineering problems}, author = {A Markopoulos and T Brzobohaty and T Kozubek and Z Dostal}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84871624449&partnerID=40&md5=987ef3dfdc907698312251b6de1c9941}, year = {2012}, date = {2012-01-01}, journal = {ECCOMAS 2012 - European Congress on Computational Methods in Applied Sciences and Engineering, e-Book Full Papers}, pages = {5411-5419}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Vlach, O; Dostál, Z; Kozubek, T; Brzobohatý, T 2012, (cited By 0). @conference{Vlach20126691b, title = {On effective implementation of the non-penetration condition for non-matching grids preserving scalability of FETI based algorithms}, author = {O Vlach and Z Dostál and T Kozubek and T Brzobohatý}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84871624422&partnerID=40&md5=3907cfa3b1c32b2b83e7549befb9216d}, year = {2012}, date = {2012-01-01}, journal = {ECCOMAS 2012 - European Congress on Computational Methods in Applied Sciences and Engineering, e-Book Full Papers}, pages = {6691-6699}, note = {cited By 0}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
Dostal, Zdenek; Kozubek, Tomas; Brzobohaty, Tomas; Markopoulos, Alexandros; Vlach, Oldrich Scalable TFETI with optional preconditioning by conjugate projector for transient frictionless contact problems of elasticity Journal Article COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING, 247 , pp. 37-50, 2012, ISSN: 0045-7825. @article{ISI:000310944400003, title = {Scalable TFETI with optional preconditioning by conjugate projector for transient frictionless contact problems of elasticity}, author = {Zdenek Dostal and Tomas Kozubek and Tomas Brzobohaty and Alexandros Markopoulos and Oldrich Vlach}, doi = {10.1016/j.cma.2012.08.003}, issn = {0045-7825}, year = {2012}, date = {2012-01-01}, journal = {COMPUTER METHODS IN APPLIED MECHANICS AND ENGINEERING}, volume = {247}, pages = {37-50}, publisher = {ELSEVIER SCIENCE SA}, address = {PO BOX 564, 1001 LAUSANNE, SWITZERLAND}, abstract = {The FETI based domain decomposition method is adapted to implement the time step of the Newmark scheme for the solution of dynamic contact problems without friction. If the ratio of the decomposition and discretization parameters is kept uniformly bounded, then the cost of the time step is proved to be proportional to the number of nodal variables. The algorithm uses our in a sense optimal MPRGP algorithm for the solution of strictly convex bound constrained quadratic programming problems with optional preconditioning by the conjugate projector to the subspace defined by the trace of the rigid body motions on the artificial subdomain interfaces. The proof of optimality combines the convergence theory of our MPRGP algorithm, the classical bounds on the spectrum of the mass and stiffness matrices, and our theory of the preconditioning by a conjugate projector for nonlinear problems. The results are confirmed by numerical solution of 2D and 3D dynamic contact problems. (C) 2012 Elsevier By. All rights reserved.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The FETI based domain decomposition method is adapted to implement the time step of the Newmark scheme for the solution of dynamic contact problems without friction. If the ratio of the decomposition and discretization parameters is kept uniformly bounded, then the cost of the time step is proved to be proportional to the number of nodal variables. The algorithm uses our in a sense optimal MPRGP algorithm for the solution of strictly convex bound constrained quadratic programming problems with optional preconditioning by the conjugate projector to the subspace defined by the trace of the rigid body motions on the artificial subdomain interfaces. The proof of optimality combines the convergence theory of our MPRGP algorithm, the classical bounds on the spectrum of the mass and stiffness matrices, and our theory of the preconditioning by a conjugate projector for nonlinear problems. The results are confirmed by numerical solution of 2D and 3D dynamic contact problems. (C) 2012 Elsevier By. All rights reserved. |
2010 |
Dostál, Z; Kozubek, T; Horyl, P; Brzobohatý, T; Markopoulos, A A scalable TFETI algorithm for two-dimensional multibody contact problems with friction Journal Article Journal of Computational and Applied Mathematics, 235 (2), pp. 403-418, 2010, (cited By 20). @article{Dostál2010403, title = {A scalable TFETI algorithm for two-dimensional multibody contact problems with friction}, author = {Z Dostál and T Kozubek and P Horyl and T Brzobohatý and A Markopoulos}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-77955709399&doi=10.1016%2fj.cam.2010.05.042&partnerID=40&md5=da3c1dd50356f9b88bc46aac738cf8ac}, doi = {10.1016/j.cam.2010.05.042}, year = {2010}, date = {2010-01-01}, journal = {Journal of Computational and Applied Mathematics}, volume = {235}, number = {2}, pages = {403-418}, note = {cited By 20}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Dostal, Zdenek; Brzobohaty, Tomas; Kozubek, Tomas; Markopoulos, Alex; Vondrak, Vit A Scalable TFETI Based Algorithm for 2D and 3D Frictionless Contact Problems Inproceedings {Lirkov, I; Margenov, S; Wasniewski, J} (Ed.): LARGE-SCALE SCIENTIFIC COMPUTING, pp. 92+, Bulgarian Acad Sci, Inst Parallel Proc SPRINGER-VERLAG BERLIN, HEIDELBERGER PLATZ 3, D-14197 BERLIN, GERMANY, 2010, ISSN: 0302-9743, (6th International Conference on Large-Scale Scientific Computing (LSSC 2009), Sozopol, BULGARIA, JUN 04-08, 2009). @inproceedings{ISI:000278091900009, title = {A Scalable TFETI Based Algorithm for 2D and 3D Frictionless Contact Problems}, author = {Zdenek Dostal and Tomas Brzobohaty and Tomas Kozubek and Alex Markopoulos and Vit Vondrak}, editor = {I {Lirkov and S Margenov and J} Wasniewski}, doi = {10.1007/978-3-642-12535-5_9}, issn = {0302-9743}, year = {2010}, date = {2010-01-01}, booktitle = {LARGE-SCALE SCIENTIFIC COMPUTING}, volume = {5910}, pages = {92+}, publisher = {SPRINGER-VERLAG BERLIN}, address = {HEIDELBERGER PLATZ 3, D-14197 BERLIN, GERMANY}, organization = {Bulgarian Acad Sci, Inst Parallel Proc}, series = {Lecture Notes in Computer Science}, abstract = {We report our recent results in the development of theoretically supported scalable algorithms for the solution of large scale complex contact problems of elasticity. The algorithms combine the TEETI based domain decomposition method adapted to the solution of 2D and 3D frictionless multibody contact problems of elasticity with our in a sense optimal algorithms for the solution of the resulting quadratic programming problems. Rather surprisingly, the theoretical results are qualitatively the same as the classical results on scalability of FETI for the linear elliptic problems. The efficiency of the method is demonstrated by the results of numerical experiments with parallel solution of both coercive and semicoercive 2D and 3D contact problems.}, note = {6th International Conference on Large-Scale Scientific Computing (LSSC 2009), Sozopol, BULGARIA, JUN 04-08, 2009}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We report our recent results in the development of theoretically supported scalable algorithms for the solution of large scale complex contact problems of elasticity. The algorithms combine the TEETI based domain decomposition method adapted to the solution of 2D and 3D frictionless multibody contact problems of elasticity with our in a sense optimal algorithms for the solution of the resulting quadratic programming problems. Rather surprisingly, the theoretical results are qualitatively the same as the classical results on scalability of FETI for the linear elliptic problems. The efficiency of the method is demonstrated by the results of numerical experiments with parallel solution of both coercive and semicoercive 2D and 3D contact problems. |