author = "Vasconcellos, Eduardo C. and Clua, Esteban W. G. and Rosa, 
                         Reinaldo Roberto and Gazolla, Jo{\~a}o G. F. M. and Ferreira, 
                         Nuno C{\'e}sar da R. and Carlquist, Victor and Costa, Carlos F. 
                         da Silva",
          affiliation = "{Universidade Federal Fluminense (UFF)} and {Universidade Federal 
                         Fluminense (UFF)} and {Instituto Nacional de Pesquisas Espaciais 
                         (INPE)} and {Universidade Federal Fluminense (UFF)} and {Instituto 
                         Nacional de Pesquisas Espaciais (INPE)} and Instituto Federal de 
                         Educa{\c{c}}{\~a}o, Ci{\^e}ncia e Tecnologia de S{\~a}o Paulo 
                         and {University of Florida}",
                title = "GPU optimization for data analysis of Mario Schenberg spherical 
              journal = "Procedia Computer Science",
                 year = "2016",
               volume = "80",
                pages = "2158--2168",
                month = "June",
                 note = "International Conference on Computational Science 2016, ICCS 2016, 
                         6-8 June 2016, San Diego, California, USA",
             keywords = "GPU Computing, Many Matrix Operations, Gravitational Waves, 
                         Astrophysical Events, Astrophysical Data Analysis.",
             abstract = "The Gravitational Wave (GW) detectors, advanced LIGO and advanced 
                         Virgo, are acquiring the potential for recording unprecedented 
                         astronomic data for astrophysical events. The Mario Schenberg 
                         detector (MSD) is a smaller scale experiment that could 
                         participate to this search. Previously, we developed a first data 
                         analysis pipeline (DAP) to transform the detector's signal into 
                         relevant GW information. This pipeline was extremely simplified in 
                         order to be executed in low-latency. In order to improve the 
                         analysis methods while keeping a low execution time, we propose 
                         three different parallel approaches using GPU/CUDA. We implemented 
                         the parallel models using cuBLAS library functions and enhance its 
                         capability with asynchronous processes in CUDA streams. Our novel 
                         model achieves performances that surpass the serial implementation 
                         within the data analysis pipeline by a speed up of 21% faster than 
                         the traditional model. This first result is part of a more 
                         comprehensive approach, in which all DAP modules that can be 
                         parallelized, are being re-written in GPGP/CUDA, and then tested 
                         and validated within the MSD context.",
                  doi = "10.1016/j.procs.2016.05.375",
                  url = "http://dx.doi.org/10.1016/j.procs.2016.05.375",
                 issn = "1877-0509",
             language = "en",
           targetfile = "vasconcelos_gpu.pdf",
        urlaccessdate = "24 jan. 2021"