@PhDThesis{Ibañez:2021:AnEmMí,
author = "Ibañez, Marilyn Minicucci",
title = "An{\'a}lise de emo{\c{c}}{\~o}es em m{\'{\i}}dias sociais
utilizando aprendizado de m{\'a}quina e s{\'e}ries temporais
considerando informa{\c{c}}{\~o}es de eventos extremos sociais e
naturais",
school = "Instituto Nacional de Pesquisas Espaciais (INPE)",
year = "2021",
address = "S{\~a}o Jos{\'e} dos Campos",
month = "2021-04-15",
keywords = "eventos extremos, m{\'{\i}}dias sociais, an{\'a}lise de
sentimento, s{\'e}ries temporais, aprendizado de m{\'a}quina,
extreme events, social media, sentiment analysis, time series,
machine learning.",
abstract = "Nas {\'u}ltimas d{\'e}cadas, o crescimento do uso da Internet
gerou um aumento substancial na circula{\c{c}}{\~a}o de
informa{\c{c}}{\~o}es nas redes sociais. Devido ao grande
interesse de diversas {\'a}reas da sociedade na an{\'a}lise de
dados de redes sociais, estabeleceu-se a busca por melhores
t{\'e}cnicas para a manipula{\c{c}}{\~a}o e compreens{\~a}o
desse conte{\'u}do, permitindo que este enorme volume de
informa{\c{c}}{\~o}es possa ser interpretado de forma
r{\'a}pida e precisa. Dentro da grande variedade de
informa{\c{c}}{\~o}es que circulam na internet, a
ocorr{\^e}ncia de eventos extremos pode ser considerada uma
{\'a}rea de grande interesse p{\'u}blico devido a sua grande
influ{\^e}ncia direta na sociedade. Assim, compreender a
eclos{\~a}o desses eventos extremos continua sendo um dos grandes
desafios cient{\'{\i}}ficos contempor{\^a}neos, cujo progresso
depende fortemente de abordagens multidisciplinares. Assim, nesta
disserta{\c{c}}{\~a}o, s{\~a}o analisados dados coletados em
m{\'{\i}}dias sociais, de grande circula{\c{c}}{\~a}o nacional
e mundial, relacionados a eventos sociais e naturais extremos, a
fim de identificar a emo{\c{c}}{\~a}o de amea{\c{c}}a definida
para cada tema do evento abordado. Como estudos de caso, foram
considerados dados sobre eventos sociais extremos relacionados a
conflitos armados, entre os pa{\'{\i}}ses S{\'{\i}}ria e EUA,
Ir{\~a} e EUA e Global (considerando os pa{\'{\i}}ses China,
{\'{\I}}ndia, Paquist{\~a}o, Reino Unido, Jap{\~a}o, EUA,
Coreia do Norte, Coreia do Sul, Taiwan e Indon{\'e}sia). Em
eventos naturais extremos foram selecionados dados relativos
{\`a} ocorr{\^e}ncia de secas, inc{\^e}ndios e desmatamentos na
regi{\~a}o da Floresta Amaz{\^o}nica para os anos de 2015, 2016,
2017, 2018, 2019 e 2020. A coleta dessas informa{\c{c}}{\~o}es
foi realizada considerando a evolu{\c{c}}{\~a}o crescente de
eventos, buscando entender como as amea{\c{c}}as ao longo do
tempo podem gerar uma evolu{\c{c}}{\~a}o end{\'o}gena
resultando em um evento extremo. O processamento dessas
informa{\c{c}}{\~o}es {\'e} realizado por meio da t{\'e}cnica
de An{\'a}lise de Sentimentos, para identificar o grau de
amea{\c{c}}a de cada not{\'{\i}}cia coletada. O endere{\c{c}}o
eletr{\^o}nico das not{\'{\i}}cias coletadas {\'e} armazenado
em arquivo .csv juntamente com as informa{\c{c}}{\~o}es sobre a
data de publica{\c{c}}{\~a}o e o grau de amea{\c{c}}a, que
formam um portf{\'o}lio de amea{\c{c}}as para cada modelo de
dados abordado. Os portf{\'o}lios foram utilizados para validar o
algoritmo P-Model como gerador de s{\'e}ries temporais
end{\'o}genas para eventos extremos. O resultado desta
valida{\c{c}}{\~a}o {\'e} a gera{\c{c}}{\~a}o de s{\'e}ries
temporais de amea{\c{c}}as end{\'o}genas, que s{\~a}o
utilizadas para prever a varia{\c{c}}{\~a}o de amea{\c{c}}a
futura dos eventos sociais e naturais extremos analisados. Para
realizar a predi{\c{c}}{\~a}o de s{\'e}ries temporais
end{\'o}genas, utiliza-se a t{\'e}cnica de Deep Learning em uma
estrutura da rede que aplica a rede neural Long-Short Term Memory
- LSTM. Os resultados alcan{\c{c}}ados com base no LSTM,
mostraram uma acur{\'a}cia entre 46% e 71% na previs{\~a}o do
padr{\~a}o de flutua{\c{c}}{\~a}o interpretado como
amea{\c{c}}as, quando considerados os dados coletados para os
dois estudos de caso abordados. ABSTRACT: In the last decades, the
growth of Internet access has generated a substantial increase in
the circulation of information on social networks. Due to the
great interest of several areas of society in the analysis of
social network data, the search for better techniques for the
manipulation and understanding of this content has been
established, allowing this huge volume of information to be
interpreted quickly and accurately. Within the wide variety of
information circulating on the internet, the occurrence of extreme
events can be considered an area of great public interest due to
their great direct influence on society. Thus, understanding the
outbreak of these extreme events remains one of the great
contemporary scientific challenges, whose progress depends heavily
on multidisciplinary approaches. Thus, in this thesis, data
collected from social media, of great national and worldwide
circulation, related to extreme social and natural events are
analyzed in order to identify the emotion of defined threat for
each event theme addressed. As case studies, data on extreme
social events related to armed conflicts were considered, between
the countries Syria and USA, Iran and USA and Global (considering
the countries China, India, Pakistan, United Kingdom, Japan, USA,
North Korea , South Korea, Taiwan and Indonesia). On extreme
natural events were selected data related to the occurrence of
drought, fires and deforestation in the Amazon Forest region for
the years 2015, 2016, 2017, 2018, 2019 and 2020. The collection of
this information was carried out considering the increasing
evolution of events, searching to understand how threats along
time can generate an endogenous evolution resulting in an extreme
event. The processing of this information is performed using the
technique of Sentiment Analysis, to identify the degree of threat
of each news collected. The electronic address of the news
collected is stored in a .csv file together with the information
on the date of publication and the degree of threat, which form a
threat portfolio for each data model addressed. The portfolios
were used to validate the algorithm P-Model as a generator of
endogenous time series for extreme events. The result of this
validation is the generation of endogenous threat time series,
which are used to predict the future threat variation of the
analyzed extreme social and natural events. To perform the
prediction of endogenous time series, the Deep Learning technique
is used in one structure of the network that applies the neural
network Long-Short Term Memory \− LSTM. The results
achieved based on the LSTM, showed an accuracy between 46% and 71%
in the prediction of the fluctuation pattern interpreted as
threats, when considering the data collected for the two case
studies addressed.",
committee = "Campos Velho, Haroldo Fraga de (presidente) and Guimar{\~a}es,
Lamartine Nogueira Frutuoso (orientador) and Rosa, Reinaldo
Roberto (orientador) and Shiguemori, Elcio Hideiti and Barchi,
Paulo Henrique and Almeida Junior, Jurandy Gomes de and Caetano,
Marco Antonio Leonel",
englishtitle = "Analysis of emotions in social media using machine learning and
time series considering information from extreme social and
natural events",
language = "pt",
pages = "192",
ibi = "8JMKD3MGP3W34R/44H7S82",
url = "http://urlib.net/ibi/8JMKD3MGP3W34R/44H7S82",
targetfile = "publicacao.pdf",
urlaccessdate = "26 abr. 2024"
}