diff --git a/DESCRIPTION b/DESCRIPTION
index 6c7029d..9da2d12 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: geocodebr
 Title: Geolocalização De Endereços Brasileiros (Geocoding Brazilian Addresses)
-Version: 0.6.2
+Version: 0.6.2.9000
 Authors@R: c(
     person("Rafael H. M.", "Pereira", , "rafa.pereira.br@gmail.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-2125-7465")),
@@ -42,7 +42,6 @@ Imports:
     duckspatial (>= 1.0.0),
     enderecobr (>= 0.5.0),
     fs,
-    geoarrow (>= 0.4.2),
     glue,
     h3r,
     httr2 (>= 1.0.0),
@@ -68,4 +67,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 Language: pt
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
+Config/roxygen2/version: 8.0.0
diff --git a/NEWS.md b/NEWS.md
index bb9dd5e..ab62d13 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,20 @@
+# geocodebr v0.6.3 dev
+
+## Correção de bugs (Bug fixes)
+
+- Bug corrigido que agora permite usuários passarem como input tabelas de 
+endereços com apenas alguns campos. Os campos de municio e unidade da federação
+continuam sendo obrigatórios. Encerra [#89](https://github.com/ipeaGIT/geocodebr/issues/89)
+e [#94](https://github.com/ipeaGIT/geocodebr/issues/94)
+
+## Mudanças pequenas (Minor changes)
+
+- A função `geocode_reverso()` teve pequeno ganho de velocidade, com drástica 
+redução no consumo de memória. Na amostra de 1000 pontos, o uso de memória caiu
+de 161MB para 95MB.
+
+
+
 # geocodebr v0.6.2
 
 ## Correção de bugs (Bug fixes)
@@ -7,7 +24,7 @@ data release corrente, e ignora eventuais dados de releases antigos que estejam
 na pasta. [Encerra #90](https://github.com/ipeaGIT/geocodebr/issues/90)
 - A função `geocode()` agora retorna erro informativo quando alguma coluna na tabela 
 de input tem nome com algum caractere não alfanumérico, como . , ? ^ - ! ~. Não 
-há problema com o sublinhado _, como em “name_muni”. Fecha [issue #92](https://github.com/ipeaGIT/geocodebr/issues/92)
+há problema com o barra baixa _, como em “name_muni”. Fecha [issue #92](https://github.com/ipeaGIT/geocodebr/issues/92)
 - Corrigido erro na função de `geocode_reverso()` que impedia usar valores muito
 altos de `dist_max`. [Encerra #88](https://github.com/ipeaGIT/geocodebr/issues/88)
 - Incluido 'Language: pt' na DESCRIPTION
diff --git a/R/geocode.R b/R/geocode.R
index ef24773..be44759 100644
--- a/R/geocode.R
+++ b/R/geocode.R
@@ -210,6 +210,30 @@ geocode_core <- function(
   # systime start 66666 ----------------
   # timer$mark("Start")
 
+  # fix eventual missing fields in input data -------------------------------------------------------
+  # geocodebr requires all address fields to be declared
+  # if one or more fields are empty, we add mock columns with empty strings
+
+  campos_endereco <- assert_and_assign_address_fields(
+    campos_endereco,
+    enderecos
+  )
+
+  # determine which columns are missing, if any
+  missing_cols <- campos_endereco[unlist(lapply(campos_endereco, is.null))]
+
+  if (length(missing_cols)>=1) {
+
+    # add empty string to missing cols
+    data.table::setDT(enderecos)
+    new_colnames <- paste0(names(missing_cols), "tempgeocodebr")
+    enderecos[, (new_colnames) := NA_character_ ]
+
+    # update address fields with fake columns
+    campos_endereco[sapply(campos_endereco, is.null)] <- as.list(new_colnames)
+  }
+
+
   # normalize input data -------------------------------------------------------
   # standardizing the addresses table to increase the chances of finding a match
   # in the CNEFE data
@@ -219,11 +243,6 @@ geocode_core <- function(
       message_standardizing_addresses()
     }
 
-    campos_endereco <- assert_and_assign_address_fields(
-      campos_endereco,
-      enderecos
-    )
-
     input_padrao <- enderecobr::padronizar_enderecos(
       enderecos = enderecos,
       campos_do_endereco = enderecobr::correspondencia_campos(
@@ -487,6 +506,21 @@ geocode_core <- function(
   # drop geocodebr temp id column
   output_df[, tempidgeocodebr := NULL]
 
+  # # col precisao como ordered factor
+  # ordem_precisao <- c(
+  #   "numero",
+  #   "numero_aproximado",
+  #   "logradouro",
+  #   "cep",
+  #   "localidade",
+  #   "municipio"
+  # )
+  # output_df[, precisao := factor(
+  #   precisao,
+  #   levels = ordem_precisao,
+  #   ordered = TRUE
+  # )]
+
   # Disconnect from DuckDB when done
   duckdb::dbDisconnect(con)
 
@@ -508,6 +542,11 @@ geocode_core <- function(
     # timer$mark("Add H3")
   }
 
+  # drop eventual mock columns with empty strings
+  if (length(missing_cols)>=1) {
+    output_df[, (new_colnames) := NULL]
+  }
+
   # remove data.table class
   data.table::setindex(output_df, NULL)
   data.table::setDF(output_df)
diff --git a/R/geocode_reverso.R b/R/geocode_reverso.R
index b82a58d..14bbb90 100644
--- a/R/geocode_reverso.R
+++ b/R/geocode_reverso.R
@@ -67,9 +67,6 @@ geocode_reverso <- function(
     )
   }
 
-  # pontos <- sf::st_transform(pontos, 4674)
-
-
   # prep input -------------------------------------------------------
 
   # converte pontos de input para data.frame
@@ -114,24 +111,8 @@ geocode_reverso <- function(
   # limita escopo de busca aos municipios  -------------------------------------------------------
   # determine potential municipalities
   munis <- system.file("extdata/munis_bbox_2022.parquet", package = "geocodebr") |>
-    arrow::open_dataset() |>
-    sf::st_as_sf()
-
-  # place holder to use geoarrow becaue:
-  #   Namespace in Imports field not imported from: 'geoarrow'
-  #        All declared Imports should be used.
-  geoarrow::as_geoarrow_vctr("POINT (0 1)")
-
-  # munis_path <- system.file("extdata/munis_2022.parquet", package = "geocodebr")
-  #
-  # query_register_muni <- glue::glue(
-  #   "CREATE OR REPLACE TEMP VIEW munis AS
-  #       SELECT *,
-  #       geometry::GEOMETRY AS geometry
-  #   FROM read_parquet('{munis_path}');"
-  # )
-  #
-  # DBI::dbExecute(conn, query_register_muni)
+    duckspatial::ddbs_open_dataset()
+
 
   potential_munis <- duckspatial::ddbs_join(
     x = pontos,
@@ -185,22 +166,28 @@ geocode_reverso <- function(
   # ST_Point(lon, lat)::GEOMETRY('EPSG:4674') AS geom
 
 
-  cnefe_utm_duck <-  duckspatial::ddbs_transform(
+  # converte cnefe para UTM
+  cnefe_utm_duck <- duckspatial::ddbs_transform(
     x = 'cnefe_tb',
-    y = 'EPSG:31983',conn = conn,
+    y = 'EPSG:31983',
+    conn = conn,
     quiet = TRUE
   )
 
-  # input to UTM
-  input_utm_duck <-  duckspatial::ddbs_transform(
+  # converte pontos para UTM
+  input_utm_duck <- duckspatial::ddbs_transform(
     x = pontos,
     y = 'EPSG:31983',
+    conn = conn,
+    name = "pontos_utm",
+    overwrite = T,
     quiet = TRUE
   )
 
-  # buffers around input points
+  # buffer around input points
   buff <- duckspatial::ddbs_buffer(
-    x = input_utm_duck,
+    x = "pontos_utm",
+    conn = conn,
     distance = dist_max,
     quiet = TRUE
   )
@@ -210,30 +197,14 @@ geocode_reverso <- function(
     result <- duckspatial::ddbs_join(
       x = cnefe_utm_duck,
       y = buff,
-      join = "within",
+      join = "intersects", # intersects within
+      conn = conn,
+      name = "join_result",
+      overwrite = T,
       quiet = TRUE
     )
   )
 
-  # write to connection
-  duckspatial::ddbs_write_table(
-    conn = conn,
-    data = input_utm_duck,
-    name = "pontos_utm",
-    overwrite = T,
-    temp_view = T,
-    quiet = TRUE
-  )
-
-  duckspatial::ddbs_write_table(
-    conn = conn,
-    data = result,
-    name = "join_result",
-    overwrite = T,
-    temp_view = T,
-    quiet = TRUE
-  )
-
   # Get column names from both tables
   cols_a <- DBI::dbGetQuery(conn, "SELECT column_name FROM (DESCRIBE pontos_utm)")$column_name
   cols_b <- DBI::dbGetQuery(conn, "SELECT column_name FROM (DESCRIBE join_result)")$column_name
@@ -256,7 +227,7 @@ geocode_reverso <- function(
         ST_Distance(a.geometry, b.geometry) AS distancia_metros,
         ROW_NUMBER() OVER (
           PARTITION BY a.id
-          ORDER BY ST_Distance(a.geometry, b.geometry)
+          ORDER BY distancia_metros
         ) AS rn
       FROM pontos_utm AS a
       JOIN join_result AS b
diff --git a/R/onLoad.R b/R/onLoad.R
deleted file mode 100644
index 08c8fba..0000000
--- a/R/onLoad.R
+++ /dev/null
@@ -1,5 +0,0 @@
-.onLoad <- function(libname, pkgname){ # nocov start
-
-  loadNamespace("geoarrow")
-
-} # nocov end
diff --git a/cran-comments.md b/cran-comments.md
index 7bcd358..6f540c5 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,21 +1,22 @@
 ## R CMD check results
 
-── R CMD check results ────────────────────────────────────────────── geocodebr 0.6.2 ────
-Duration: 2m 33.3s
+── R CMD check results ───────────────────────────────────────────── geocodebr 0.6.3 ────
+Duration: 2m 39s
 
 0 errors ✔ | 0 warnings ✔ | 0 notes ✔
 
-# geocodebr v0.6.2
 
-## Correção de bugs (Bug fixes)
+# geocodebr v0.6.3
 
-- Fixed a bug to ensure that the package uses only cached data from the 
-current release and ignores any data from older releases that may be 
-in the folder. [Closes #90](https://github.com/ipeaGIT/geocodebr/issues/90)
-- The `geocode()` function now returns an informational error when a column in the 
-input table has a name containing a non-alphanumeric character, such as . , ? ^ - ! ~. There 
-is no issue with the underscore _, as in “name_muni”. Closed [issue #92](https://github.com/ipeaGIT/geocodebr/issues/92)
-- Fixed a bug in the `geocode_reverso()` function that prevented the use of very
-high values for `dist_max`. [Closes #88](https://github.com/ipeaGIT/geocodebr/issues/88)
-- Added ‘Language: pt’ to DESCRIPTION
+## Bug fixes
 
+- Fixed a bug that now allows users to pass address tables containing only a 
+subset of address fields as input. Municipality and state fields remain 
+mandatory. Closes [#89](https://github.com/ipeaGIT/geocodebr/issues/89)
+and [#94](https://github.com/ipeaGIT/geocodebr/issues/94)
+
+## Minor changes
+
+- The `geocode_reverso()` function achieved a small speed improvement, along 
+with a substantial reduction in memory usage. In a sample of 1,000 points, 
+memory consumption dropped from 161MB to 95MB.
diff --git a/inst/extdata/large_sample.parquet b/inst/extdata/large_sample.parquet
index 7709723..70993af 100644
Binary files a/inst/extdata/large_sample.parquet and b/inst/extdata/large_sample.parquet differ
diff --git a/inst/extdata/munis_bbox_2022.parquet b/inst/extdata/munis_bbox_2022.parquet
index c83c2d6..3f65f94 100644
Binary files a/inst/extdata/munis_bbox_2022.parquet and b/inst/extdata/munis_bbox_2022.parquet differ
diff --git a/man/definir_pasta_cache.Rd b/man/definir_pasta_cache.Rd
index 68c89ab..42eb8f6 100644
--- a/man/definir_pasta_cache.Rd
+++ b/man/definir_pasta_cache.Rd
@@ -9,7 +9,7 @@ definir_pasta_cache(path, verboso = TRUE)
 \arguments{
 \item{path}{Uma string. O caminho para o diretório usado para armazenar os
 dados em cache.  Se \code{NULL}, o pacote usará um diretório versionado salvo
-dentro do diretório retornado por \code{\link[tools:userdir]{tools::R_user_dir()}}.}
+dentro do diretório retornado por \code{\link[tools:R_user_dir]{tools::R_user_dir()}}.}
 
 \item{verboso}{Um valor lógico. Indica se barras de progresso e mensagens
 devem ser exibidas durante o download dos dados do CNEFE e a geocodificação
diff --git a/man/geocodebr.Rd b/man/geocodebr.Rd
index 175aae4..deff2d2 100644
--- a/man/geocodebr.Rd
+++ b/man/geocodebr.Rd
@@ -7,7 +7,7 @@
 \title{Package: geocodebr: Geolocalização De Endereços Brasileiros (Geocoding
 Brazilian Addresses)}
 \description{
-\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
+\if{html}{\figure{logo.svg}{options: style='float: right' alt='logo' width='120'}}
 
 Método simples e eficiente de geolocalizar dados no Brasil. O pacote é baseado em conjuntos de dados espaciais abertos de endereços brasileiros, utilizando como fonte principal o Cadastro Nacional de Endereços para Fins Estatísticos (CNEFE). O CNEFE é publicado pelo Instituto Brasileiro de Geografia e Estatística (IBGE), órgão oficial de estatísticas e geografia do Brasil. (A simple and efficient method for geolocating data in Brazil. The package is based on open spatial datasets of Brazilian addresses, primarily using the Cadastro Nacional de Endereços para Fins Estatísticos (CNEFE), published by the Instituto Brasileiro de Geografia e Estatística (IBGE), Brazil's official statistics and geography agency.)
 }
@@ -25,6 +25,7 @@ Useful links:
 
 Authors:
 \itemize{
+  \item Rafael H. M. Pereira \email{rafa.pereira.br@gmail.com} (\href{https://orcid.org/0000-0003-2125-7465}{ORCID})
   \item Daniel Herszenhut \email{dhersz@gmail.com} (\href{https://orcid.org/0000-0001-8066-1105}{ORCID})
   \item Gabriel Garcia de Almeida (\href{https://orcid.org/0009-0003-3557-7328}{ORCID})
 }
diff --git a/tests/tests_rafa/benchmark_20k.R b/tests/tests_rafa/benchmark_20k.R
index 9d5e7fd..b467768 100644
--- a/tests/tests_rafa/benchmark_20k.R
+++ b/tests/tests_rafa/benchmark_20k.R
@@ -9,14 +9,16 @@ ncores <- 7
 
 
 campos <- geocodebr::definir_campos(
-  logradouro = 'logradouro',
-  numero = 'numero',
+  # logradouro = 'logradouro',
+  # numero = 'numero',
   cep = 'cep',
   localidade = 'bairro',
   municipio = 'municipio',
   estado = 'uf'
 )
 
+input_df$logradouro <- NULL
+input_df$numero <- NULL
 
 bench::mark(iterations = 3,
   a <- geocodebr::geocode(
@@ -34,5 +36,8 @@ bench::mark(iterations = 3,
 #          expression    min median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result memory
 # streetmap 0.6.0 dev  7.10s  7.26s     0.136    5.47MB        0     5     0      36.7s <df>   <Rprofmem>
 # laptop    0.6.0 CRAN  5.2s  5.53s     0.174    7.46MB        0     5     0      28.8s <df>
-# load 1 a <- geoc…  8.1s  8.79s     0.116    3.03MB        0     3     0        26s
-# sem 1 a <- geoc… 10.3s  10.5s    0.0944    3.03MB        0     3     0      31.8s
+#1              ""    8.67s  8.86s     0.113    2.04MB   0.0565     2     1      17.7s <df>
+#1              ""    8.35s  8.82s     0.115    5.43MB        0     3     0      26.1s <df>
+#1        "NA_int"    6.52s  6.58s     0.152    4.18MB   0.0760     2     1      13.2s <df>
+#1        "NA_int"    6.54s  6.81s     0.147    1.73MB   0.0734     2     1      13.6s <df>
+1 a <- geocodebr::ge… 7.58s  7.72s     0.124    4.18MB        0     3     0      24.1s <df>
diff --git a/tests/tests_rafa/generate_sample_data.R b/tests/tests_rafa/generate_sample_data.R
index b07906a..032bd12 100644
--- a/tests/tests_rafa/generate_sample_data.R
+++ b/tests/tests_rafa/generate_sample_data.R
@@ -130,6 +130,9 @@ setDT(df)
 df[, id := 1:nrow(df)]
 head(df)
 
+data.table::setindex(df, NULL)
+data.table::setDF(df)
+
 arrow::write_parquet(df, './inst/extdata/large_sample.parquet')
 
 
diff --git a/tests/tests_rafa/munis_bbox.R b/tests/tests_rafa/munis_bbox.R
index 29473b7..93f79a2 100644
--- a/tests/tests_rafa/munis_bbox.R
+++ b/tests/tests_rafa/munis_bbox.R
@@ -8,34 +8,34 @@ library(geoarrow)
 # Load the state polygons
 df <- geobr::read_municipality(year = 2022, simplified = T)
 
-# Calculate bounding boxes of states
-bounding_boxes <- df |>
-  st_as_sf() |>                           # Ensure df is an sf object
-  rowwise() |>                            # Process each polygon individually
-  mutate(
-    xmin = st_bbox(geometry)["xmin"],      # Extract xmin from the bounding box
-    ymin = st_bbox(geometry)["ymin"],      # Extract ymin from the bounding box
-    xmax = st_bbox(geometry)["xmax"],      # Extract xmax from the bounding box
-    ymax = st_bbox(geometry)["ymax"]       # Extract ymax from the bounding box
-  ) |>
-  ungroup() |>                            # Unrowwise after rowwise operations
-  select(code_muni, xmin, ymin, xmax, ymax) |> # Select desired columns
-  st_drop_geometry()
-
-# View the resulting bounding box data.frame
-head(bounding_boxes)
-
-data.table::fwrite(bounding_boxes, './inst/extdata/munis_bbox.csv')
-
-
-head(input_table)
-
-candidate_states <-
-  subset(x = bounding_boxes,
-         (xmin < bbox_lon_min | xmax > bbox_lon_max) &
-           (ymin < bbox_lat_min | ymax > bbox_lat_max)
-  )
-
+# # Calculate bounding boxes of states
+# bounding_boxes <- df |>
+#   st_as_sf() |>                           # Ensure df is an sf object
+#   rowwise() |>                            # Process each polygon individually
+#   mutate(
+#     xmin = st_bbox(geometry)["xmin"],      # Extract xmin from the bounding box
+#     ymin = st_bbox(geometry)["ymin"],      # Extract ymin from the bounding box
+#     xmax = st_bbox(geometry)["xmax"],      # Extract xmax from the bounding box
+#     ymax = st_bbox(geometry)["ymax"]       # Extract ymax from the bounding box
+#   ) |>
+#   ungroup() |>                            # Unrowwise after rowwise operations
+#   select(code_muni, xmin, ymin, xmax, ymax) |> # Select desired columns
+#   st_drop_geometry()
+#
+# # View the resulting bounding box data.frame
+# head(bounding_boxes)
+#
+# data.table::fwrite(bounding_boxes, './inst/extdata/munis_bbox.csv')
+#
+#
+# head(input_table)
+#
+# candidate_states <-
+#   subset(x = bounding_boxes,
+#          (xmin < bbox_lon_min | xmax > bbox_lon_max) &
+#            (ymin < bbox_lat_min | ymax > bbox_lat_max)
+#   )
+#
 
 
 
@@ -65,17 +65,23 @@ bounding_boxes <- bounding_boxes |>
 
 head(bounding_boxes)
 
-# arrow::write_parquet(bounding_boxes2, "munis_bbox_2022.parquet")
-arrow::write_parquet(bounding_boxes, "munis_bbox_2022.parquet",
-                     compression='zstd',
-                     compression_level = 7)
+# # arrow::write_parquet(bounding_boxes2, "munis_bbox_2022.parquet")
+# arrow::write_parquet(bounding_boxes, "munis_bbox_2022.parquet",
+#                      compression='zstd',
+#                      compression_level = 7)
+#
+
+# remove the classes "tbl_df" "tbl" from an object
+class(bounding_boxes) <- setdiff(class(bounding_boxes), c("tbl_df", "tbl"))
+
+duckspatial::ddbs_write_dataset(
+  data = bounding_boxes,
+  path = './inst/extdata/munis_bbox_2022.parquet',
+  crs = "EPSG:4674",
+  overwrite = T,
+  parquet_compression = "ZSTD",
+  quiet = TRUE
+)
 
 
-path <- "//storage6/usuarios/Proj_acess_oport/git_rafa/prep_roger/data/municipality/2022/municipalities_2022_simplified.parquet"
-df <- arrow::open_dataset(path) |>
-  dplyr::select(code_muni, geometry) |>
-  sf::st_as_sf()
 
-arrow::write_parquet(df, "munis_2022.parquet",
-                     compression='zstd',
-                     compression_level = 7)
diff --git a/tests/tests_rafa/reverse_geocode_tests.R b/tests/tests_rafa/reverse_geocode_tests.R
index db51853..3ce2130 100644
--- a/tests/tests_rafa/reverse_geocode_tests.R
+++ b/tests/tests_rafa/reverse_geocode_tests.R
@@ -1,6 +1,7 @@
 devtools::load_all('.')
+# library(geocodebr)
 library(dplyr)
-library(geoarrow)
+library(sf)
 
 
 # input data
@@ -20,21 +21,19 @@ bench::system_time(
    dist_max = 1000
    )
 )
+
 View(out)
 
 
 # ttt <- data.frame(id=1, lat=-15.814192047159876, lon=-47.90534614672923)
-# reverse_geocode(df = ttt)
-
-# take aways
-# ok reverse_geocode_filter  # mais rapdido e eficiente, mas sem progress bar
-# ok reverse_geocode_join    # igual o _filter, mas usa join
-# ok reverse_geocode_hybrid  # com progress bar mas um pouco mais lento e bem mais memoria
-# ok reverse_geocode_arrow   # tempo igual a _hybrid, mas usa bem mais memoria
-# ok           filterloop    # disparado o mais lento, com progress e memoria media
+# ttt <- sf::st_as_sf(
+#   ttt,
+#   coords = c("lon", "lat"),
+#   crs = 4674
+# )
+#
+# geocode_reverso(pontos = ttt)
 
-# essa funcao pode fica muito mais rapida / eficiente se usarmos a biblioteca de
-# dados espaciais do duckdb
 
 b5 <- bench::mark(
   current = geocode_reverso(pontos = pontos, dist_max = 1000),
@@ -43,322 +42,13 @@ b5 <- bench::mark(
 )
 
 b5
-#
-
-# # 500 pontos
-#     expression           min median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result memory
-#     <bch:expr>        <bch:> <bch:>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list> <list>
-#   1 duck_filter4       1.54m  1.62m   0.0101    221.5MB  0.0423      5    21      8.28m <NULL> <Rprofmem>
-#   2 duck_filter_loop4  5.35m  6.88m   0.00255    14.5MB  0.00357     5     7      32.7m <NULL> <Rprofmem>
-#   3 hybrid4            2.41m  2.47m   0.00663    34.5MB  0.302       5   228     12.56m <NULL> <Rprofmem>
-
-# # 1000 pontos
-#     expression   min median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result memory
-#     <bch:expr> <bch> <bch:>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list> <list>
-#   1 duck_filt… 2.97m  2.97m   0.00560    11.3MB    0         1     0      2.97m <NULL> <Rprofmem>
-#   2 duck_join4 3.03m  3.03m   0.00550    11.5MB    0         1     0      3.03m <NULL> <Rprofmem>
-#   3 arrow4     4.27m  4.27m   0.00391   240.1MB    0.316     1    81      4.27m <NULL> <Rprofmem>
-#   4 hybrid4    4.24m  4.24m   0.00393   122.4MB    0.110     1    28      4.24m <NULL> <Rprofmem>
-#   4 filterloop 10.7m 11.27m   0.00146    19.8MB  0.00195     3     4     34.19m <NULL> <Rprofmem> <bench_tm [3]> <tibble>
-# 1 current      1.92s  1.98s     0.496  247.62MB    3.17      5    32     10.08s <NULL> <Rprofmem [22,026 × 3]> <bench_tm [5]> <tibble>
-# 2 geocrev2     1.28s  1.33s     0.730    8.03MB    0.730     5     5      6.85s <NULL> <Rprofmem [13,876 × 3]> <bench_tm [5]> <tibble>
-# 3 geocrev3     1.38s  1.61s     0.626    8.29MB    0.752     5     6      7.98s <NULL> <Rprofmem [15,238 × 3]> <bench_tm [5]> <tibble>
-#
-
-
-
-# aternativas da funcao de geocode reverso -----------------------------------------------------------
-
-#' muni join + haversine
-#' (usa spatial join para detectar munis candidatos, e depois calcular haversine na unha)
-#' a diferenca dessa para a versao implementada é que a implementada calcula distancias dentro do duckspatial com ST_DIST
-
-geocode_reverso2 <- function(
-    pontos,
-    dist_max = 1000,
-    verboso = TRUE,
-    cache = TRUE,
-    n_cores = NULL
-) {
-  # check input
-  checkmate::assert_class(pontos, 'sf')
-  checkmate::assert_number(dist_max, lower = 500, upper = 100000) # max 100 Km
-  checkmate::assert_logical(verboso)
-  checkmate::assert_logical(cache)
-
-  # check if geometry type is POINT
-  if (any(sf::st_geometry_type(pontos) != 'POINT')) {
-    cli::cli_abort(
-      "Input precisa ser um sf data frame com geometria do tipo POINT."
-    )
-  }
-
-  epsg <- sf::st_crs(pontos)$epsg
-  if (epsg != 4674) {
-    cli::cli_abort(
-      "Dados de input precisam estar com sistema de coordenadas geogr\u00e1ficas SIRGAS 2000, EPSG 4674."
-    )
-  }
-
-  # prep input -------------------------------------------------------
-
-  # converte para data.frame
-  coords <- sfheaders::sf_to_df(pontos, fill = TRUE)
-  data.table::setDT(coords)
-  coords[, c('sfg_id', 'point_id') := NULL]
-  data.table::setnames(coords, old = c('x', 'y'), new = c('lon', 'lat'))
-
-  # create temp id
-  coords[, tempidgeocodebr := 1:nrow(coords)]
-
-  # convert max_dist to degrees
-  # 1 degree of latitude is always 111320 meters
-  margin_lat <- dist_max / 111320
-
-  # 1 degree of longitude is 111320 * cos(lat)
-  coords[, c("lat_min", "lat_max") := .(lat - margin_lat, lat + margin_lat)]
-
-  coords[,
-         c("lon_min", "lon_max") := .(
-           lon - dist_max / 111320 * cos(lat),
-           lon + dist_max / 111320 * cos(lat)
-         )
-  ]
-
-  # get bounding box around input points
-  # using a range of max dist around input points
-  bbox_lat_min <- min(coords$lat_min)
-  bbox_lat_max <- max(coords$lat_max)
-  bbox_lon_min <- min(coords$lon_min)
-  bbox_lon_max <- max(coords$lon_max)
-
-  # check if input falls within Brazil
-  bbox_brazil <- data.frame(
-    xmin = -73.99044997,
-    ymin = -33.75208127,
-    xmax = -28.83594354,
-    ymax = 5.27184108
-  )
-
-  error_msg <- 'Coordenadas de input localizadas fora do bounding box do Brasil.'
-  if (
-    bbox_lon_min < bbox_brazil$xmin |
-    bbox_lon_max > bbox_brazil$xmax |
-    bbox_lat_min < bbox_brazil$ymin |
-    bbox_lat_max > bbox_brazil$ymax
-  ) {
-    cli::cli_abort(error_msg)
-  }
-
-  # download cnefe  -------------------------------------------------------
-
-  # downloading cnefe
-  cnefe_dir <- geocodebr::download_cnefe(
-    tabela = 'municipio_logradouro_numero_cep_localidade',
-    verboso = verboso,
-    cache = cache
-  )
-
-  # creating a temporary db and register the input table data
-  con <- create_geocodebr_db(n_cores = n_cores)
-
-
-  # limita escopo de busca aos municipios  -------------------------------------------------------
-  # determine potential municipalities
-  munis <- system.file("extdata/munis_bbox_2022.parquet", package = "geocodebr") |>
-    arrow::open_dataset() |>
-    sf::st_as_sf()
-  # munis_path <- system.file("extdata/munis_2022.parquet", package = "geocodebr")
-  #
-  # query_register_muni <- glue::glue(
-  #   "CREATE OR REPLACE TEMP VIEW munis AS
-  #       SELECT *,
-  #       geometry::GEOMETRY AS geometry
-  #   FROM read_parquet('{munis_path}');"
-  # )
-  #
-  # DBI::dbExecute(conn, query_register_muni)
-
-  potential_munis <- duckspatial::ddbs_join(
-    x = pontos,
-    y = munis,
-    join = "within",
-    quiet = TRUE
-  ) |>
-    dplyr::pull(code_muni) |>
-    unique()
-
-  potential_munis <- enderecobr::padronizar_municipios(potential_munis)
-
-  # lida com munis com apostrofe no nome tipo Olho d'agua
-  potential_munis <- gsub("'", "''", potential_munis, fixed = TRUE)
-
-  unique_munis <- paste(glue::glue("'{potential_munis}'"), collapse = ",")
-
-  # build path to local file
-  path_to_parquet <- fs::path(
-    listar_pasta_cache(),
-    glue::glue("geocodebr_data_release_{data_release}"),
-    paste0("municipio_logradouro_numero_cep_localidade.parquet")
-  )
-
-  # create filtered_cnefe table, filter on the fly
-  cols_to_keep <- c(
-    "estado",
-    "municipio",
-    "logradouro",
-    "numero",
-    "cep",
-    "localidade",
-    "endereco_completo",
-    "lon",
-    "lat"
-  )
-  cols_to_keep <- paste0(cols_to_keep, collapse = ", ")
-
-  # Load CNEFE data and filter it to include only municipalities
-  # present in the input table, reducing the search scope
-  # Narrow search global scope of cnefe to bounding box
-  query_filter_cnefe <- glue::glue(
-    "CREATE TEMP VIEW filtered_cnefe AS
-        SELECT {cols_to_keep}
-        FROM read_parquet('{path_to_parquet}') m
-          WHERE m.municipio IN ({unique_munis});"
-  )
-
-  DBI::dbExecute(con, query_filter_cnefe)
-  # DBI::dbExecute(con, query_filter_cnefe)
-  # b <- DBI::dbReadTable(con, "filtered_cnefe")
-
-  # Convert input data frame to DuckDB table
-  duckdb::dbWriteTable(con, "input_table_db", coords, temporary = TRUE)
 
-  # Haversine macro (kept for speed; consider spatial extension later)
-  DBI::dbExecute(
-    con,
-    "
-    CREATE MACRO IF NOT EXISTS haversine(lat1, lon1, lat2, lon2) AS (
-      6378137 * 2 * ASIN(
-        SQRT(
-          POWER(l(lat2 - lat1) / 2), 2) +
-          COS(RADIANS(lat1)) * COS(RADIANS(lat2)) *
-          POWER(SIN(RADIANS(lon2 - lon1) / 2), 2)
-        )
-      )
-    );
-  "
-  )
+## 500 pontos
+#    expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result memory
+#  v0.6.2           2.9s    3.02s     0.330     153MB     1.39     5    21      15.1s <NULL>
+#  v0.6.2.9000      2.4s    2.49s     0.393    4.72MB    0.708     5     9      12.7s <NULL
 
-  # TO OPTMIZE 666666666666666666666 -------------------------------------------
-  # 1.1) calc dist mantendo apenas tempidgeocodebr
-  # 1.2) calc de dist sucessivo: primeiro 1k, dpeois a distancia q o usuario passa
-  # 1.3) dist usando duckspatial
-  # 2) depois dar um left join do input_table_db com resultado da dist para retornar input original
-
-
-  # Find cases nearby -------------------------------------------------------
-  query_filter_cases_nearby <- glue::glue(
-    "WITH dist_data AS (
-        SELECT
-              input_table_db.* EXCLUDE (lon_min, lon_max, lat_min, lat_max),
-              filtered_cnefe.endereco_completo,
-              filtered_cnefe.estado,
-              filtered_cnefe.municipio,
-              filtered_cnefe.logradouro,
-              filtered_cnefe.numero,
-              filtered_cnefe.cep,
-              filtered_cnefe.localidade,
-              haversine(
-                    input_table_db.lat, input_table_db.lon,
-                    filtered_cnefe.lat, filtered_cnefe.lon
-              ) AS distancia_metros
-        FROM
-              input_table_db, filtered_cnefe
-        WHERE
-              input_table_db.lat_min < filtered_cnefe.lat
-          AND input_table_db.lat_max > filtered_cnefe.lat
-          AND input_table_db.lon_min < filtered_cnefe.lon
-          AND input_table_db.lon_max > filtered_cnefe.lon
-    ),
-
-    ranked AS (
-        SELECT
-            *,
-            RANK() OVER (
-                PARTITION BY tempidgeocodebr
-                ORDER BY distancia_metros ASC
-            ) AS ranking
-        FROM dist_data
-    )
-
-    SELECT * EXCLUDE(tempidgeocodebr, ranking)
-    FROM ranked
-    WHERE ranking = 1;"
-  )
-
-
-
-  output <- DBI::dbGetQuery(con, query_filter_cases_nearby)
-
-  # TODO 6666666
-  if (nrow(output)==0){
-    stop("Nenhum endereco proximo foi encontrados")
-  }
-
-  # organize output -------------------------------------------------
-
-  # convert df to simple feature
-  output_sf <- sfheaders::sf_point(
-    obj = output,
-    x = 'lon',
-    y = 'lat',
-    keep = TRUE
-  )
-
-  sf::st_crs(output_sf) <- 4674
-
-  duckdb::dbDisconnect(con)
-
-  return(output_sf)
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-pontos <- readRDS(
-  system.file("extdata/pontos.rds", package = "geocodebr")
-)
-
-bench::mark(
-  # duck_filter1 = reverse_geocode_filter(coordenadas = pontos, dist_max = 2000, n_cores = 1),
-  # duck_filter8 = reverse_geocode_filter(coordenadas = pontos, dist_max = 2000, n_cores = 8),
-  # duck_join1 =  reverse_geocode_join(coordenadas = pontos, dist_max = 2000, n_cores = 1),
-  # duck_join8 =  reverse_geocode_join(coordenadas = pontos, dist_max = 2000, n_cores = 8),
-  hybrid1 = reverse_geocode_hybrid(coordenadas = pontos, dist_max = 2000, n_cores = 1),
-  hybrid8 = reverse_geocode_hybrid(coordenadas = pontos, dist_max = 2000, n_cores = 8),
-  iterations = 5,
-  check = F
-)
-
-# 1000 pontos
-#     expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result memory
-#     <bch:expr>   <bch:>   <bch:>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list> <list>
-# 1 duck_filter1    3.19m    3.31m   0.00490      75MB  0.0108      5    11        17m <NULL> <Rprofmem>
-# 2 duck_filter8    2.93m    3.04m   0.00516      51MB  0.00723     5     7      16.1m <NULL> <Rprofmem>
-#
-# 1 duck_join1      2.93m    3.54m   0.00475    76.2MB  0.00854     5     9      17.6m <NULL> <Rprofmem>
-# 2 duck_join8      3.62m    4.05m   0.00407    51.2MB  0.00651     5     8      20.5m <NULL> <Rprofmem>
-#
-# 1 hybrid1         5.13m     5.9m   0.00277    88.3MB    0.262     5   473      30.1m <NULL> <Rprofmem>
-# 2 hybrid8         5.09m    5.19m   0.00321      66MB    0.307     5   478      25.9m <NULL> <Rprofmem>
+## 1000 pontos
+#   expression       min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result
+#       v0.6.2     4.05s    4.66s     0.217     161MB     1.04     5    24        23s <NULL>
+#  v0.6.2.9000     3.15s    4.11s     0.236      95MB    0.378     5     8      21.2s <NULL>
diff --git a/tests/testthat/test-geocode.R b/tests/testthat/test-geocode.R
index 8d7fb0d..f83943f 100644
--- a/tests/testthat/test-geocode.R
+++ b/tests/testthat/test-geocode.R
@@ -70,6 +70,72 @@ test_that("expected output", {
 })
 
 
+
+test_that("partial address in the input", {
+
+  df_parcial <- data.frame(
+    uf        = c("PA", "PA"),
+    municipio = c("Santarem", "Santarem"),
+    cep_estab = c("68005000", "68000000"),
+    stringsAsFactors = FALSE
+  )
+
+  campos_parcial <- geocodebr::definir_campos(
+    estado    = "uf",
+    municipio = "municipio",
+    cep       = "cep_estab"
+  )
+
+  testthat::succeed(
+    tester(enderecos = df_parcial, campos_endereco = campos_parcial)
+  )
+
+  # missing critical column uf of muni
+  testthat::expect_error(
+    geocodebr::definir_campos(
+      # estado    = "uf",
+      municipio = "municipio")
+    )
+
+  # quando colunas de dados e campos nao correspondem
+  df_parcial2 <- data.frame(
+     uf        = c("PA", "PA"),
+    municipio = c("Santarem", "Santarem"),
+    #cep_estab = c("68005000", "68000000"),
+    stringsAsFactors = FALSE
+  )
+
+  testthat::expect_error(
+    tester(enderecos = df_parcial2, campos_endereco = campos_parcial)
+  )
+
+
+})
+
+
+test_that("precisao Ipea", {
+
+  df_ipea <- data.frame(
+    uf        = c("DF", "RJ"),
+    municipio = c("Brasilia", "Rio de Janeiro"),
+    cep = c("70390-025", "20071-001"),
+    stringsAsFactors = FALSE
+  )
+
+  campos_ipea <- geocodebr::definir_campos(
+    estado    = "uf",
+    municipio = "municipio",
+    cep       = "cep"
+  )
+
+  out_ipea <- tester(enderecos = df_ipea, campos_endereco = campos_ipea)
+
+  testthat::expect_true(all(out_ipea$desvio_metros < 115))
+
+})
+
+
+
 test_that("argumento padronizar endereco", {
 
   # erro se input nao estiver padronizado