Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dados-rastreador
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Packages
Packages
Container Registry
Analytics
CI / CD Analytics
Repository Analytics
Value Stream Analytics
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Camex Público
dados-rastreador
Commits
d01621ba
Commit
d01621ba
authored
Jan 22, 2026
by
Miguel Guerrero
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update gera_correlacoes
parent
e5808716
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
116 additions
and
130 deletions
+116
-130
scripts/gera_correlacoes.R
scripts/gera_correlacoes.R
+116
-130
No files found.
scripts/gera_correlacoes.R
View file @
d01621ba
...
@@ -60,7 +60,8 @@ path_descricoes <- file.path(
...
@@ -60,7 +60,8 @@ path_descricoes <- file.path(
Sys.getenv
(
"general"
),
Sys.getenv
(
"general"
),
"Bases"
,
"Bases"
,
"NCM_sh1996_a_sh2022"
,
"NCM_sh1996_a_sh2022"
,
"descricoes_ncm_sh.xlsx"
"raw"
,
"tabela_nomenclatura.parquet"
)
)
...
@@ -117,7 +118,9 @@ write_parquet(correlacao_completa, "dados_gerados/dataset_ncm.parquet")
...
@@ -117,7 +118,9 @@ write_parquet(correlacao_completa, "dados_gerados/dataset_ncm.parquet")
# load(path_naladi_ncm)
# load(path_naladi_ncm)
naladi_ncm_manual
<-
read_xlsx
(
path_naladi_ncm_manual
)
naladi_ncm_manual
<-
read_xlsx
(
path_naladi_ncm_manual
,
guess_max
=
1e6
)
colunas
<-
names
(
naladi_ncm_manual
)
colunas
<-
names
(
naladi_ncm_manual
)
...
@@ -130,21 +133,6 @@ naladi_ncm_manual <- naladi_ncm_manual |>
...
@@ -130,21 +133,6 @@ naladi_ncm_manual <- naladi_ncm_manual |>
select
(
contains
(
ano_max
))
|>
select
(
contains
(
ano_max
))
|>
distinct
()
distinct
()
# correlacao_completa <- correlacao_naladi_ncm |>
# distinct() |>
# rename_with(~str_to_upper(.x))
#
# # obtem ultimo ano
# colunas <- names(correlacao_completa)
#
# ano_max <- colunas |>
# str_extract("[0-9]+") |>
# max() |>
# unique()
#
# naladi_ncm_auto <- correlacao_completa |>
# select(contains(ano_max)) |>
# distinct()
naladi_ncm_manual
|>
naladi_ncm_manual
|>
write_parquet
(
"dados_gerados/dataset_naladi_ncm.parquet"
)
write_parquet
(
"dados_gerados/dataset_naladi_ncm.parquet"
)
...
@@ -178,115 +166,127 @@ write_parquet(correlacao_completa, "dados_gerados/dataset_naladi.parquet")
...
@@ -178,115 +166,127 @@ write_parquet(correlacao_completa, "dados_gerados/dataset_naladi.parquet")
# Processa e organiza descrições NCM --------------------------------------
# Processa e organiza descrições NCM --------------------------------------
descricoes_por_sh
<-
read_excel
(
cria_padrao
<-
function
(
x
)
{
path_descricoes
,
paste0
(
x
,
"$"
,
collapse
=
"|"
)
guess_max
=
1e5
}
)
%>%
rename
(
sh
=
grupo_sh
)
ultimo_sh
<-
descricoes_por_sh
%>%
distinct
(
sh
)
%>%
pull
(
sh
)
%>%
max
()
descricoes_por_sh
<-
descricoes_por_sh
%>%
descricoes_raw
<-
arrow
::
read_parquet
(
path_descricoes
)
|>
# remove o ultimo sh, pois ha ncms que ja deixaram de existir
select
(
filter
(
sh
!=
ultimo_sh
)
%>%
codigo
,
nomeExtenso
,
inicioVigencia
,
inicioVigenciaStr
,
fimVigencia
,
fimVigenciaStr
)
|>
mutate
(
mutate
(
ncm
=
paste0
(
grupo_sh
=
case_when
(
str_sub
(
ncm
,
1
,
4
),
"."
,
str_detect
(
fimVigenciaStr
,
cria_padrao
(
1996
:
2001
))
~
"1996"
,
str_sub
(
ncm
,
5
,
6
),
"."
,
str_detect
(
fimVigenciaStr
,
cria_padrao
(
2002
:
2006
))
~
"2002"
,
str_sub
(
ncm
,
7
,
8
)
str_detect
(
fimVigenciaStr
,
cria_padrao
(
2007
:
2011
))
~
"2007"
,
),
str_detect
(
fimVigenciaStr
,
cria_padrao
(
2012
:
2016
))
~
"2012"
,
sh
=
paste
(
"NCM"
,
sh
)
fimVigencia
==
1648695600000
~
"2017"
,
TRUE
~
"2022"
,
)
)
)
|>
group_by
(
grupo_sh
,
codigo
)
|>
slice_max
(
order_by
=
fimVigencia
,
n
=
1
)
|>
ungroup
()
x
<-
descricoes_raw
|>
filter
(
nchar
(
codigo
)
!=
2
)
|>
select
(
codigo
,
nomeExtenso
,
grupo_sh
)
|>
mutate
(
n
=
nchar
(
codigo
)
)
####################
sh4
<-
x
|>
# # obtem abas disponíveis
filter
(
n
==
4
)
|>
# abas <- readxl::excel_sheets(path_descricoes)
select
(
-
n
)
|>
# # obtem nome da ultima aba
rename
(
sh4
=
codigo
)
|>
# ultimo_sh <- abas[length(abas)]
rename
(
desc_sh4
=
nomeExtenso
)
|>
# # remove ultima aba
mutate
(
desc_sh4
=
str_squish
(
desc_sh4
))
# abas <- abas[-length(abas)]
sh5
<-
x
|>
# append_descricoes <- function(aba) {
filter
(
# read_xlsx(path_descricoes, sheet = aba) %>%
n
==
5
# mutate(
)
|>
# across(
select
(
-
n
)
|>
# everything(),
rename
(
sh5
=
codigo
)
|>
# ~if_else(is.na(.x), "", .x)
rename
(
desc_sh5
=
nomeExtenso
)
|>
# ),
mutate
(
desc_sh5
=
str_squish
(
desc_sh5
))
# across(
# everything(),
sh6
<-
x
|>
# ~str_replace(.x, "^- ", "-")
filter
(
n
==
6
)
|>
# ),
select
(
-
n
)
|>
# across(
rename
(
sh6
=
codigo
)
|>
# everything(),
rename
(
desc_sh6
=
nomeExtenso
)
|>
# ~str_replace(.x, "^-- ", "--")
mutate
(
desc_sh6
=
str_squish
(
desc_sh6
))
# ),
# ) %>%
sh7
<-
x
|>
# transmute(
filter
(
n
==
7
)
|>
# codigoFormatado,
select
(
-
n
)
|>
# sh,
rename
(
sh7
=
codigo
)
|>
# descricao = paste(
rename
(
desc_sh7
=
nomeExtenso
)
|>
# desc_pos,
mutate
(
desc_sh7
=
str_squish
(
desc_sh7
))
# desc_subpos5,
# desc_subpos6,
descricoes_por_sh
<-
x
|>
# desc_item,
filter
(
n
==
8
)
|>
# desc_subitem,
select
(
-
n
)
|>
# sep = "\n"
mutate
(
# )
sh4
=
str_sub
(
codigo
,
1
,
4
),
# ) %>%
sh5
=
str_sub
(
codigo
,
1
,
5
),
# mutate(
sh6
=
str_sub
(
codigo
,
1
,
6
),
# descricao = str_replace_all(
sh7
=
str_sub
(
codigo
,
1
,
7
),
# descricao,
nomeExtenso
=
str_replace
(
# "\n\n\n\n\n|\n\n\n\n|\n\n\n|\n\n",
nomeExtenso
,
# "\n"
"^- "
,
# )
"-"
# ) %>%
# mutate(
# descricao = str_remove(descricao, "\n$")
# )
# }
# descricoes_por_sh <- purrr::map_dfr(
# abas,
# ~ append_descricoes(.x)
# ) %>%
# rename(ncm = codigoFormatado) %>%
# mutate(sh = str_replace(sh, "SH", "NCM"))
descricoes_atuais
<-
readRDS
(
file.path
(
Sys.getenv
(
"general"
),
"Bases"
,
"NCM_sh1996_a_sh2022"
,
"descricoes_atuais.rds"
)
)
)
%>%
)
|>
as_tibble
()
%>%
mutate
(
mutate
(
nomeExtenso
=
str_replace
(
nomeExtenso
,
"^-- "
,
"--"
)
)
|>
left_join
(
sh4
,
by
=
c
(
"sh4"
,
"grupo_sh"
))
|>
left_join
(
sh5
,
by
=
c
(
"sh5"
,
"grupo_sh"
))
|>
left_join
(
sh6
,
by
=
c
(
"sh6"
,
"grupo_sh"
))
|>
left_join
(
sh7
,
by
=
c
(
"sh7"
,
"grupo_sh"
))
|>
select
(
-
matches
(
"^sh"
))
|>
mutate
(
across
(
starts_with
(
"desc_sh"
),
~
str_replace
(
.x
,
"^- "
,
"-"
)
),
across
(
starts_with
(
"desc_sh"
),
~
str_replace
(
.x
,
"^-- "
,
"--"
)
)
)
|>
unite
(
col
=
descricao
,
starts_with
(
"desc_sh"
),
nomeExtenso
,
sep
=
"\n"
,
na.rm
=
TRUE
)
|>
select
(
ncm
=
codigo
,
sh
=
grupo_sh
,
descricao
)
|>
mutate
(
sh
=
paste
(
"NCM"
,
sh
),
ncm
=
paste0
(
ncm
=
paste0
(
str_sub
(
ncm
,
1
,
4
),
str_sub
(
ncm
,
1
,
4
),
"."
,
"."
,
str_sub
(
ncm
,
5
,
6
),
"."
,
str_sub
(
ncm
,
5
,
6
),
"."
,
str_sub
(
ncm
,
7
,
8
)
str_sub
(
ncm
,
7
,
8
)
),
))
sh
=
paste
(
"NCM"
,
ultimo_sh
)
)
%>%
relocate
(
sh
,
.after
=
ncm
)
descricoes_por_sh
<-
descricoes_por_sh
%>%
bind_rows
(
descricoes_atuais
)
%>%
distinct
()
# necessário escapar ' para não dar problema na geração dos tooltips dos
# necessário escapar ' para não dar problema na geração dos tooltips dos
# diagramas
# diagramas
...
@@ -310,21 +310,7 @@ descricoes_por_sh <- descricoes_por_sh %>%
...
@@ -310,21 +310,7 @@ descricoes_por_sh <- descricoes_por_sh %>%
)
)
)
)
# #####################################################
# # ATENÇÃO: Solução provisória para descrições NCM ##
# # Quando houver descrições para todas as NCMs ######
# # remover o código abaixo ##########################
# ####################################################
#
# descricoes_por_sh <- descricoes_por_sh %>%
# mutate(descricao = if_else(
# sh != "NCM 2022",
# " ",
# descricao
# ))
descricoes_por_sh
%>%
descricoes_por_sh
%>%
write_parquet
(
arrow
::
write_parquet
(
"dados_gerados/descricoes_ncm.parquet"
"dados_gerados/descricoes_ncm.parquet"
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment