author = "Silva J{\'u}nior, Manoel Baptista and Panetta, Jairo and 
                         Stephany, Stephan",
          affiliation = "{Instituto Nacional de Pesquisas Espaciais (INPE)} and {Instituto 
                         Tecnol{\'o}gico de Aeron{\'a}utica (ITA)} and {Instituto 
                         Nacional de Pesquisas Espaciais (INPE)}",
                title = "Portability with efficiency of the advection of BRAMS between 
                         multi-core and many-core architectures",
              journal = "Concurrency Computation",
                 year = "2017",
               volume = "29",
               number = "22",
                pages = "e3959",
                month = "Nov.",
             keywords = "atmospheric numerical model, OpenMP, OpenACC, code portability, 
                         multi-core architecture, general purpose graphics processing 
             abstract = "The continuous growth of spatial resolution and forecasting period 
                         in current atmospheric models demands increasing processing power 
                         supplied by supercomputers with hundreds or thousands of nodes. 
                         Currently, most of these models are operationally executed on 
                         supercomputers composed of nodes with tens of cores (multi-core 
                         architecture). Newer supercomputer generations have nodes with 
                         multi-core processors coupled to processing accelerators, 
                         typically graphics cards with hundreds of cores (many-core 
                         architecture). The rewriting of model codes to use both 
                         architectures efficiently, that is, executing with or without 
                         graphics cards, represents a challenge because these models have 
                         hundreds of thousands of lines. The OpenMP programming interface 
                         proposed decades ago is a de facto standard that efficiently 
                         explores multi-core architectures. A new programming interface, 
                         OpenACC, is being proposed for many-core architectures. These two 
                         programming interfaces are similar, because they are based on 
                         parallelization directives for the concurrent execution of 
                         threads. This work shows the feasibility of writing a single 
                         portable code embedding both interfaces and presenting acceptable 
                         efficiency when executed on nodes with multi-core or many-core 
                         architecture. The code chosen as a case study is the advection of 
                         scalars, a part of the dynamics of the regional atmospheric model 
                         Brazilian Regional Atmospheric Modeling System (BRAMS). The 
                         dynamics of a model is harder to parallelize because of data 
                         dependencies between adjacent grid points. Single-node executions 
                         of the advections of scalars for different grid sizes using OpenMP 
                         or OpenACC yielded similar speed-ups, showing the feasibility of 
                         the proposed approach.",
                  doi = "10.1002/cpe.3959",
                  url = "http://dx.doi.org/10.1002/cpe.3959",
                 issn = "1532-0626",
             language = "en",
        urlaccessdate = "26 nov. 2020"