Fechar

@Article{WatanabeFeCaSoCaVi:2020:ReEfSo,
               author = "Watanabe, Willian Massami and Felizardo, Katia Romero and Candido 
                         J{\'u}nior, Arnaldo Candido and Souza, {\'E}rica Ferreira de and 
                         Campos Neto, Jos{\'e} Ede de and Vijaykumar, Nandamudi 
                         Lankalapalli",
          affiliation = "{Universidade Tecnol{\'o}gica Federal do Paran{\'a} (UTFPR)} and 
                         {Universidade Tecnol{\'o}gica Federal do Paran{\'a} (UTFPR)} and 
                         {Universidade Tecnol{\'o}gica Federal do Paran{\'a} (UTFPR)} and 
                         {Universidade Tecnol{\'o}gica Federal do Paran{\'a} (UTFPR)} and 
                         {Universidade Tecnol{\'o}gica Federal do Paran{\'a} (UTFPR)} and 
                         {Instituto Nacional de Pesquisas Espaciais (INPE)}",
                title = "Reducing efforts of software engineering systematic literature 
                         reviews updates using text classification",
              journal = "Information and Software Technology",
                 year = "2020",
               volume = "128",
                pages = "e106395",
                month = "Dec.",
             keywords = "Systematic literature review SLR Automatic selection Review update 
                         Text classification Document classification Text categorization.",
             abstract = "Context: Systematic Literature Reviews (SLRs) are frequently used 
                         to synthesize evidence in Software Engineering (SE), however 
                         replicating and keeping SLRs up-to-date is a major challenge. The 
                         activity of studies selection in SLR is labor intensive due to the 
                         large number of studies that must be analyzed. Different 
                         approaches have been investigated to support SLR processes, such 
                         as: Visual Text Mining or Text Classification. But acquiring the 
                         initial dataset is time-consuming and labor intensive. Objective: 
                         In this work, we proposed and evaluated the use of Text 
                         Classification to support the studies selection activity of new 
                         evidences to update SLRs in SE. Method: We applied Text 
                         Classification techniques to investigate how effective and how 
                         much effort could be spared during the studies selection phase of 
                         an SLR update. Considering the SLRs update scenario, the studies 
                         analyzed in the primary SLR could be used as a classified dataset 
                         to train Supervised Machine Learning algorithms. We conducted an 
                         experiment with 8 Software Engineering SLRs. In the experiments, 
                         we investigated the use of multiple preprocessing and feature 
                         extraction tasks such as tokenization, stop words removal, word 
                         lemmatization, TF-IDF (Term-Frequency/Inverse-Document-Frequency) 
                         with Decision Tree and Support Vector Machines as classification 
                         algorithms. Furthermore, we configured the classifier activation 
                         threshold for maximizing Recall, hence reducing the number of 
                         Missed selected studies. Results: The techniques accuracies were 
                         measured and the results achieved on average a F-Score of 0.92 and 
                         62% of exclusion rate when varying the activation threshold of the 
                         classifiers, with a 4% average number of Missed selected studies. 
                         Both the Exclusion rate and number of Missed selected studies were 
                         significantly different when compared to classifier which did not 
                         use the configuration of the activation threshold. Conclusion: The 
                         results showed the potential of the techniques in reducing the 
                         effort required of SLRs updates.",
                  doi = "10.1016/j.infsof.2020.106395",
                  url = "http://dx.doi.org/10.1016/j.infsof.2020.106395",
                 issn = "0950-5849",
             language = "en",
           targetfile = "watanabe_reducing.pdf",
        urlaccessdate = "25 abr. 2024"
}


Fechar