Fechar

@PhDThesis{Lotte:2018:StDeRe,
               author = "Lotte, Rodolfo Georjute",
                title = "3-dimensional (3D) urban mapping: a study of detection and 
                         reconstruction of building's facade through Structure-from-Motion 
                         (SfM) and Convolutional Neural Network (CNN)",
               school = "Instituto Nacional de Pesquisas Espaciais (INPE)",
                 year = "2018",
              address = "S{\~a}o Jos{\'e} dos Campos",
                month = "2018-08-24",
             keywords = "3D urban mapping, facade features, deep-learning, convolutional 
                         neural network, structure-from-motion, mapeamento 3D urbano, 
                         fei{\c{c}}{\~o}es de fachadas, redes neurais convolutivas.",
             abstract = "Ambientes urbanos s{\~a}o regi{\~o}es cuja variabilidade 
                         espectral e espacial {\'e} extremamente alta, com uma enorme 
                         variedade de formas e tamanhos que remetem igualmente ao 
                         sensoriamento remoto de alta resolu{\c{c}}{\~a}o em 
                         aplica{\c{c}}{\~o}es envolvendo seus estudos. Devido ao fato de 
                         que esses ambientes podem crescer ainda mais, as 
                         aplica{\c{c}}{\~o}es relacionadas ao seu monitoramento em larga 
                         escala tendem a recorrer a sistemas aut{\^o}nomos que, juntamente 
                         com imagens de alta resolu{\c{c}}{\~a}o, podem ajudar e at{\'e} 
                         predizer situa{\c{c}}{\~o}es cotidianas. Aliado {\`a} 
                         detec{\c{c}}{\~a}o inteligente dessas fei{\c{c}}{\~o}es, 
                         representa{\c{c}}{\~o}es 3D desses ambientes t{\^e}m sido 
                         tamb{\'e}m objeto de estudo ao auxiliar na 
                         investiga{\c{c}}{\~a}o da qualidade ambiental de {\'a}reas 
                         muito densas, padr{\~o}es socioecon{\^o}micos de 
                         ocupa{\c{c}}{\~a}o, na constru{\c{c}}{\~a}o de modelos de 
                         paisagem urbanos, avalia{\c{c}}{\~a}o de efeitos de ilhas de 
                         calor, demoli{\c{c}}{\~o}es de edif{\'{\i}}cios ou 
                         simula{\c{c}}{\~o}es de inunda{\c{c}}{\~o}es para planos de 
                         evacua{\c{c}}{\~a}o e delimita{\c{c}}{\~a}o estrat{\'e}gica, 
                         entre in{\'u}meros outros. Por estes aspectos, o objetivo desta 
                         pesquisa de doutorado foi explorar as vantagens de tais 
                         tecnologias, de forma a apresentar n{\~a}o s{\'o} uma 
                         metodologia autom{\'a}tica para detec{\c{c}}{\~a}o e 
                         reconstru{\c{c}}{\~a}o de elementos urbanos, como tamb{\'e}m 
                         compreender as dificuldades que ainda cercam o mapeamento 
                         autom{\'a}tico desses ambientes. Como objetivos 
                         espec{\'{\i}}ficos: (i) Desenvolver uma rotina de 
                         classifica{\c{c}}{\~a}o autom{\'a}tica de fei{\c{c}}{\~o}es 
                         de fachadas no dom{\'{\i}}nio 2D, utilizando-se de uma Rede 
                         Neural Convolutiva (CNN). (ii) Com as mesmas imagens, obter a 
                         geometria da fachada pelas t{\'e}cnicas de Estrutura por 
                         Movimento (em ingl{\^e}s, Structure-from-Motion (SfM)) e 
                         Est{\'e}reo por Multi-Visadas (em ingl{\^e}s, Multi-View Stereo 
                         (MVS)). (iii) Avaliar o desempenho do modelo neural para 
                         diferentes cen{\'a}rios urbanos e estilos arquitet{\^o}nicos. 
                         (iv) Avaliar o desempenho do modelo neural em uma 
                         aplica{\c{c}}{\~a}o real no Brasil, cuja arquitetura 
                         diferencia-se dos dados utilizados no treinamento do modelo 
                         neural. (v) Classificar o modelo 3D da fachada extra{\'{\i}}da 
                         utilizando-se das imagens segmentadas no dom{\'{\i}}nio 2D pela 
                         t{\'e}cnica de Ray-Tracing (RT). Para tanto, a metodologia do 
                         trabalho foi dividida em an{\'a}lise 2D (detec{\c{c}}{\~a}o) e 
                         3D (reconstru{\c{c}}{\~a}o). De forma que no primeiro, uma CNN 
                         supervisionada {\'e} utilizada para segmentar imagens 
                         {\'o}pticas terrestres de fachadas em seis classes: telhado, 
                         janela, parede, porta, sacada e lojas. Simultaneamente, a fachada 
                         {\'e} reconstru{\'{\i}}da pelo uso do pipeline SfM/MVS, 
                         obtendo-se a geometria da cena. Por fim, os resultados da 
                         segmenta{\c{c}}{\~a}o no dom{\'{\i}}nio 2D, juntamente com 3D, 
                         s{\~a}o ent{\~a}o vinculados pela t{\'e}cnica de RT, obtendo-se 
                         finalmente o modelo 3D classificado. {\'E} demonstrado que a 
                         metodologia proposta {\'e} robusta em rela{\c{c}}{\~a}o a 
                         cen{\'a}rios complexos. As infer{\^e}ncias realizadas com o 
                         modelo neural CNN alcan{\c{c}}ou at{\'e} 93% de acur{\'a}cia, e 
                         90% de F1-score para maioria dos conjuntos de dados utilizados. 
                         Para cen{\'a}rios desconhecidos, o modelo neural atingiu 
                         {\'{\i}}ndices de acur{\'a}cia inferiores, justificado pela 
                         elevada diferencia{\c{c}}{\~a}o de estilos arquitet{\^o}nicos. 
                         Contudo, a utiliza{\c{c}}{\~a}o de modelos neurais deep, 
                         d{\~a}o margem {\`a} novas configura{\c{c}}{\~o}es e uso 
                         conjunto com outras arquiteturas deep para a melhoria dos 
                         resultados, sobretudo, aos modelos n{\~a}o-supervisionados. Por 
                         fim, o trabalho demonstrou a capacidade aut{\^o}noma de uma Rede 
                         Neural Convolutiva frente a complexidade dos ambientes urbanos, de 
                         modo a diversificar entre diferentes estilos de fachadas. Embora 
                         haja melhorias a serem realizadas quanto {\`a} 
                         classifica{\c{c}}{\~a}o 3D, a metodologia {\'e} consistente e 
                         permitiu aliar m{\'e}todos de {\'u}ltima gera{\c{c}}{\~a}o na 
                         detec{\c{c}}{\~a}o e reconstru{\c{c}}{\~a}o de fachadas, 
                         al{\'e}m de fornecer suporte {\`a} novos estudos e 
                         proje{\c{c}}{\~o}es sobre cen{\'a}rios ainda mais distintos. 
                         ABSTRACT: Urban environments are regions in which spectral and 
                         spatial variability are extremely high, with a huge range of 
                         shapes and sizes, they also demand high resolution images for 
                         applications involving their study. These environments can grow 
                         over time, applications related to their large-scale monitoring 
                         tend to rely on autonomous intelligent systems that, along with 
                         high-resolution images, can help and even predict everyday 
                         situations. In addition to the detection of these features, 3D 
                         representations of these environments have also been object of 
                         study to assist in the investigation of the environmental quality 
                         of very dense areas, occupational socioeconomic patterns, the 
                         construction of urban landscape models, building demolitions or 
                         flood simulations for evacuation plans and strategic delimitation, 
                         among countless others. The main objective of this study was to 
                         explore the advantages of such technologies, in order to present 
                         an automatic methodology for the detection and reconstruction of 
                         urban elements, and also to understand the difficulties that still 
                         surround the automatic mapping of these environments. Specifically 
                         we aimed: (i) To develop a routine of automatic classification of 
                         facade features in 2D domain, using a Convolutional Neural Network 
                         (CNN); (ii) Using the same images, obtain the facade geometry 
                         using Structure-from-Motion (SfM) and Multi-View Stereo (MVS) 
                         techniques; (iii) Evaluate the performance of the CNN for 
                         different urban scenarios and architectural styles; (iv) Evaluate 
                         the performance of the CNN in a real application in Brazil, whose 
                         architecture differs from the datasets used in the neural model 
                         training; and (v) Classify the 3D model of the extracted facade 
                         using images segmented in 2D domain by the Ray-Tracing (RT) 
                         technique. In order to atempt that, the methodology was splited 
                         into 2D analysis (detection) and 3D (reconstruction). So in the 
                         first, a supervised CNN is used to segment terrestrial optical 
                         images of facades into six classes: roof, window, wall, door, 
                         balcony and shops. At the same time, the facade is reconstructed 
                         using the SfM/MVS technique, obtaining the geometry of the scene. 
                         Finally, the results of segmentation in both domains, 2D and 3D, 
                         are then merged by the Ray-Tracing technique, finally obtaining 
                         the 3D model classified. It is demonstrated that the proposed 
                         methodology is robust toward complex scenarios. The inferences 
                         made with the CNN reached up to 93% accuracy, and 90% F1-score for 
                         most of the datasets used. For scenarios not used for training, 
                         the neural model reached lower accuracy indexes, justified by the 
                         high differentiation of architectural styles. However, the use of 
                         deep neural models gives chances for new configurations and use 
                         with other deep architectures to improve results, especially for 
                         unsupervised models. Finally, the work demonstrated the autonomous 
                         capacity of a CNN against the complexity of urban environments, in 
                         order to diversify between different styles of facades. Although 
                         there are improvements to be made regarding 3D classification, the 
                         methodology is consistent and allowed to combine state-of-the-art 
                         methods in the detection and reconstruction of urban elements, as 
                         well as providing support for new studies and projections on even 
                         more distinct scenarios.",
            committee = "K{\"o}rting, Thales Sehn (presidente) and Arag{\~a}o, Luiz 
                         Eduardo Oliveira e Cruz de (orientador) and Shimabukuro, Yosio 
                         Edemir (orientador) and Wagner, Fabien Hubert and Mitishita, Edson 
                         Aparecido and Haala, Norbert and Tommaselli, Ant{\^o}nio Maria 
                         Garcia",
         englishtitle = "Mapeamento urbano tridimensional (3D): um estudo sobre 
                         detec{\c{c}}{\~a}o e reconstru{\c{c}}{\~a}o de fachadas de 
                         edifica{\c{c}}{\~o}es por Estrutura-por-Movimento (SfM) e Redes 
                         Neurais Convolutivas (CNN)",
             language = "en",
                pages = "107",
                  ibi = "8JMKD3MGP3W34R/3RKQRSE",
                  url = "http://urlib.net/ibi/8JMKD3MGP3W34R/3RKQRSE",
           targetfile = "publicacao.pdf",
        urlaccessdate = "19 abr. 2024"
}


Fechar