Fechar

@InProceedings{SantiagoJúnior:2022:MeExEv,
               author = "Santiago J{\'u}nior, Valdivino Alexandre de",
          affiliation = "{Instituto Nacional de Pesquisas Espaciais (INPE)}",
                title = "A Method and Experiment to evaluate Deep Neural Networks as Test 
                         Oracles for Scientific Software",
            booktitle = "Proceedings...",
                 year = "2022",
         organization = "IEEE/ACM International Conference on Automation of Software Test 
                         (AST)",
            publisher = "IEEE",
             keywords = "Test Oracles, Deep Convolutional Neural Networks, Transfer 
                         Learning, , Explainable Artificial Intelligence, Data-Centric 
                         Artificial Intelligenc.",
             abstract = "Testing scientific software is challenging because usually such 
                         type of systems have non-deterministic behaviours and, in 
                         addition, they generate non-trivial outputs such as images. 
                         Artificial intelligence (AI) is now a reality which is also 
                         helping in the development of the software testing activity. In 
                         this article, we evaluate seven deep neural networks (DNNs), 
                         precisely deep convolutional neural networks (CNNs) with up to 161 
                         layers, playing the role of test oracle procedures for testing 
                         scientific models. Firstly, we propose a method, TOrC, which 
                         starts by generating training, validation, and test image datasets 
                         via combinatorial interaction testing applied to the original 
                         codes and second-order mutants. Within TOrC we also have classical 
                         steps such as transfer learning, a technique recommended for DNNs. 
                         Then, we verified the performance of the oracles (CNNs). The main 
                         conclusions of this research are: i) not necessarily a greater 
                         number of layers means that a CNN will present better performance; 
                         ii) transfer learning is a valuable technique but eventually we 
                         may need extended solutions to get better performances; iii) 
                         data-centric AI is an interesting path to follow; and iv) there is 
                         not a clear correlation between the software bugs, in the 
                         scientific models, and the errors (image misclassifications) 
                         presented by the CNNs.",
      conference-year = "16-20 May 2022",
             language = "en",
           targetfile = "Paper 2_A Method_Oficial.pdf",
        urlaccessdate = "29 jun. 2024"
}


Fechar