@PhDThesis{Rodrigues:2024:MaLeHa,
author = "Rodrigues, Marcos Lima",
title = "Machine learning e hashing para identifica{\c{c}}{\~a}o de
imagens de sensoriamento remoto baseada em conte{\'u}do",
school = "Instituto Nacional de Pesquisas Espaciais (INPE)",
year = "2024",
address = "S{\~a}o Jos{\'e} dos Campos",
month = "2023-12-15",
keywords = "recupera{\c{c}}{\~a}o de imagens baseada em conte{\'u}do, redes
neurais convolucionais, euroSAT, uso e cobertura da terra,
Cerrado, content-based image retrieval (CBIR), deep hashing neural
network (DHNN), euroSAT, land use and land cover (LULC), the
brazilian savanna (Cerrado).",
abstract = "Neste trabalho {\'e} apresentado o desenvolvimento de uma
solu{\c{c}}{\~a}o (framework) para busca e
recupera{\c{c}}{\~a}o de imagens de sat{\'e}lite baseadas em
conte{\'u}do, com potencial para aplica{\c{c}}{\~a}o no escopo
de grandes conjuntos de dados. A {\'a}rea de sensoriamento remoto
(SR) para observa{\c{c}}{\~a}o da Terra tem experimentado um
grande desenvolvimento na {\'u}ltima d{\'e}cada, dando origem a
era do Remote Sensing Big Data (RSBD), tornando desafiadora a
tarefa de recuperar imagens {\'u}teis nesse grande volume de
dados, por exemplo, que possam ser usadas para estudos de uso e
cobertura da terra no Cerrado brasileiro. Nesse contexto, o
desenvolvimento de sistemas baseados em Content-Based Image
Retrieval (CBIR) apoiado por m{\'e}todos de Deep Learning como as
Convolutional Neural Networks (CNNs), t{\^e}m sido empregados com
sucesso a dados multifontes e multiespectrais (MS). As
arquiteturas Deep Hashing Neural Networks (DHNNs) empregam CNNs
para extra{\c{c}}{\~a}o de atributos de imagens e convers{\~a}o
desses atributos em c{\'o}digos bin{\'a}rios (hash codes) para
cria{\c{c}}{\~a}o de um espa{\c{c}}o m{\'e}trico otimizado
para CBIR no escopo do RSBD. A Metric-Learning-Based Deep Hashing
Network (MiLaN) representa o estado da arte desse tipo de
arquitetura, baseada na combina{\c{c}}{\~a}o de tr{\^e}s
fun{\c{c}}{\~o}es de perda que permitem o aprendizado de um
espa{\c{c}}o m{\'e}trico ideal para a recupera{\c{c}}{\~a}o de
imagens baseada em conte{\'u}do (Semantic- Based Metric Space).
Originalmente a rede MiLaN adotou como m{\'o}dulo de
extra{\c{c}}{\~a}o de caracter{\'{\i}}sticas das imagens
(backbone) a rede Inception V3 pr{\'e}-treinada com dados fora do
dom{\'{\i}}nio do SR (ImageNet), isso implica em
limita{\c{c}}{\~o}es devido a diferen{\c{c}}as t{\'{\i}}picas
entre as imagens como a resolu{\c{c}}{\~a}o espacial e
influ{\^e}ncia da atmosfera nas imagens orbitais. O framework
proposto possibilitou avan{\c{c}}os em rela{\c{c}}{\~a}o {\`a}
abordagem original da MiLaN ao adotar um novo backbone baseado na
ResNet-50 e realizar o processo de ajuste dessas arquiteturas
(MiLaN+ResNet-50) atrav{\'e}s do fine-tuning baseado em imagens
satelitais MS. Esta afirma{\c{c}}{\~a}o {\'e} evidenciada pelos
resultados expressivos alcan{\c{c}}ados para tarefa CBIR medidos
atrav{\'e}s da m{\'e}trica mean Average Precision - mAP, o
desempenho global baseado nas 100 primeiras imagens recuperadas
(mAP@100) foi de 99,8873% para o conjunto EuroSAT MS (Sentinel 2 -
13 bandas). De maneira particular foi demonstrado que os dados MS
fornecem informa{\c{c}}{\~o}es sem{\^a}nticas de qualidade
durante o processo de extra{\c{c}}{\~a}o de
caracter{\'{\i}}sticas usando a ResNet-50, contribuindo assim
para corre{\c{c}}{\~a}o de erros em rela{\c{c}}{\~a}o {\`a}
discrimina{\c{c}}{\~a}o de imagens que apresentam padr{\~o}es
geom{\'e}tricos ({\'A}reas Industriais/Residenciais) e de
textura (Floresta, Pastagem e Culturas Permanente) similares
quando utilizado somente as bandas RGB das imagens de m{\'e}dia
resolu{\c{c}}{\~a}o do conjunto EuroSAT. O desempenho para o
conjunto EuroSAT MS superou o apresentado por outros m{\'e}todos
do estado da arte para realiza{\c{c}}{\~a}o de CBIR, inclusive
utilizando imagens a{\'e}reas de alta resolu{\c{c}}{\~a}o
espacial do conjunto Aerial Image Dataset (AID). ABSTRACT: This
work presents the development of a framework for searching and
retrieving content-based satellite images, with potential for
application in the scope of large datasets. The area of remote
sensing (RS) for Earth observation has experienced great
development in the last decade, giving rise to the era of Remote
Sensing Big Data (RSBD), making the task of retrieving useful
images from this large volume of data challenging, for example,
that can be used for studies of land use and land cover in the
Brazilian Cerrado. In this context, the development of systems
based on Content-Based Image Retrieval (CBIR) supported by Deep
Learning methods such as Convolutional Neural Networks (CNNs),
have been successfully applied to multisource and multispectral
(MS) data. Deep Hashing Neural Networks (DHNNs) architectures
employ CNNs to extract image attributes and convert these
attributes into binary codes (hash codes) to create a metric space
optimized for CBIR within the scope of RSBD. The
Metric-Learning-Based Deep Hashing Network (MiLaN) represents the
state of the art of this type of architecture, based on the
combination of three loss functions that allow the learning of a
space ideal metric for CBIR (Semantic-Based Metric Space).
Originally, the MiLaN network adopted the Inception V3 network
pre-trained with data outside the RS domain (ImageNet) as an image
feature extraction module (backbone), this implies limitations due
to typical differences between images such as the spatial
resolution and influence of the atmosphere on orbital images. The
proposed framework enabled advances in the original MiLaN approach
by adopting a new backbone based on ResNet-50 and carrying out the
adjustment process of these architectures (MiLaN+ResNet-50)
through fine-tuning based on MS satellite images. This statement
is evidenced by the expressive results achieved for the CBIR task
measured using the mean Average Precision (mAP) metric, the global
performance based on the top-100 recovered images (mAP@100) was
99.8873% for the set EuroSAT MS (Sentinel 2 - 13 bands). In
particular, it was demonstrated that MS data provides quality
semantic information during the feature extraction process using
ResNet-50, thus contributing to error correction concerning the
discrimination of images that present geometric patterns
(Industrial/Residential Areas) and texture (Forest, Pasture and
Permanent Crops) similar when using only the RGB bands of the
medium resolution images from the EuroSAT set. The performance for
the EuroSAT MS dataset surpassed that presented by other
state-of-the-art methods for carrying out CBIR, including using
high spatial resolution aerial images from the Aerial Image
Dataset (AID).",
committee = "Gomes, Karine Reis Ferreira (presidente) and K{\"o}rting, Thales
Sehn (orientador) and Queiroz, Gilberto Ribeiro de (orientador)
and Negri, Rog{\'e}rio Galante and Noma, Alexandre",
englishtitle = "Machine learning and hashing for content-based image retrieval
(CBIR) of remote sensing images",
language = "pt",
pages = "106",
ibi = "8JMKD3MGP3W34T/4ADRCA2",
url = "http://urlib.net/ibi/8JMKD3MGP3W34T/4ADRCA2",
targetfile = "publicacao.pdf",
urlaccessdate = "19 maio 2024"
}