Recently Published
2010_CensoBR_Enrollment_rates_per_decile_GREEN_national_average.R
# 2010_CensoBR_Enrollment_rates_per_decile_GREEN_national_average.R
# ==============================================================================
# DESCRIPTION: TERTIARY EDUCATION ACCESS RATE BY INCOME DECILE
# Brazil Census 2010 | comparable to PNAD series (code 190_1)
#
# INDICATOR: Currently enrolled (V0628 ∈ {1,2} & V0629 ∈ {09-12})
# OR previously attended (V0633 ∈ {11-14})
# Ages 18-24 | Income: household per capita (V6531)
#
# VISUALIZATION:
# - Bars per decile with 95% CI (svyciprop logit)
# - National average in green (same style as PNAD code)
# - Labels and aesthetics in English
# ==============================================================================
library(censobr)
library(arrow)
library(dplyr)
library(Hmisc)
library(survey)
library(plotly)
# --- VISUALIZATION SETTINGS (mirror of PNAD code) ---
COR_NATIONAL <- "#2ca02c" # Strong green — national average
CORES_DECIS <- colorRampPalette(c("#D7191C", "#FFFFBF", "#2B83BA"))(10)
SHOW_CI_NATIONAL <- TRUE # Set FALSE to clean up
SHOW_CI_DECILES <- TRUE # Set FALSE to clean up
# ── 1. NATIONAL INCOME DECILE BREAKS ─────────────────────────────────────────
pop_renda <- read_population(
year = 2010,
columns = c("V6531", "V0010"),
add_labels = NULL,
showProgress = FALSE
) |>
filter(!is.na(V6531)) |>
collect()
q_breaks_full <- wtd.quantile(
x = pop_renda$V6531,
weights = pop_renda$V0010,
probs = seq(0, 1, 0.1),
na.rm = TRUE
)
cat("Income decile breaks (R$):\n"); print(round(q_breaks_full, 2))
rm(pop_renda)
# ── 2. LOAD DATA & FILTER AGES 18-24 ─────────────────────────────────────────
# V6036 = age in completed years
# V0628 = currently attending school (1=public, 2=private)
# V0629 = current course (09-12 = tertiary)
# V0633 = highest course ever attended (11-14 = tertiary)
# V6531 = household per capita income (R$, July 2010)
# V0010 = sampling weight
df_jovens <- read_population(
year = 2010,
columns = c("V6036", "V0628", "V0629", "V0633", "V6531", "V0010"),
add_labels = NULL,
showProgress = TRUE
) |>
filter(
as.integer(V6036) >= 18L,
as.integer(V6036) <= 24L,
!is.na(V6531)
) |>
collect()
# ── 3. BUILD INDICATOR + ASSIGN DECILES ──────────────────────────────────────
#
# Equivalence with PNAD code:
# ens_sup == 1 → currently enrolled → V0628 ∈ {1,2} & V0629 ∈ {09-12}
# ens_sup_a == 1 → previously attended → V0633 ∈ {11-14}
df_jovens <- df_jovens |>
mutate(
decil_renda = cut(
V6531,
breaks = q_breaks_full,
labels = paste0("D", 1:10),
include.lowest = TRUE
),
freq_current = !is.na(V0628) & V0628 %in% c("1", "2") &
!is.na(V0629) & V0629 %in% c("09", "10", "11", "12"),
freq_past = !is.na(V0633) & V0633 %in% c("11", "12", "13", "14"),
sup_total = as.integer(freq_current | freq_past)
) |>
filter(!is.na(decil_renda))
cat("Youth 18-24 in sample (with income):", nrow(df_jovens), "\n")
# ── 4. SURVEY DESIGN + COMPUTE RATES WITH 95% CI ─────────────────────────────
design_censo <- svydesign(ids = ~1, weights = ~V0010, data = df_jovens)
# By decile
taxa_decil <- svyby(
~sup_total, ~decil_renda, design_censo,
svyciprop, method = "logit", vartype = "ci"
) |>
mutate(
decil = as.character(decil_renda),
decil_num = as.numeric(gsub("D", "", decil)),
prop = sup_total * 100,
lower = ci_l * 100,
upper = ci_u * 100,
ano = 2010
)
# National average
taxa_nacional_raw <- svyciprop(~sup_total, design_censo,
method = "logit", vartype = "ci")
taxa_nacional <- data.frame(
decil = "National",
decil_num = 0,
prop = as.numeric(taxa_nacional_raw) * 100,
lower = attr(taxa_nacional_raw, "ci")[1] * 100,
upper = attr(taxa_nacional_raw, "ci")[2] * 100,
ano = 2010
)
cat("\nNational access rate — Census 2010 (ages 18-24):",
sprintf("%.1f%%", taxa_nacional$prop),
"[ 95% CI:", sprintf("%.1f", taxa_nacional$lower), "–",
sprintf("%.1f%%", taxa_nacional$upper), "]\n")
# ── 5. DECILE LABELS (mirror of PNAD code) ───────────────────────────────────
labels_decis <- c(
"Decile 1: bottom 10%",
"Decile 2: 10-20%",
"Decile 3: 20-30%",
"Decile 4: 30-40%",
"Decile 5: 40-50%",
"Decile 6: 50-60%",
"Decile 7: 60-70%",
"Decile 8: 70-80%",
"Decile 9: 80-90%",
"Decile 10: top 10%"
)
# ── 6. PLOTLY CHART ───────────────────────────────────────────────────────────
fig_2010_censo <- plot_ly()
# 6A) DECILE BARS (bottom layer)
for (i in 1:10) {
df_d <- taxa_decil |> filter(decil_num == i)
fig_2010_censo <- fig_2010_censo |>
add_trace(
data = df_d,
x = ~decil_num,
y = ~prop,
type = "bar",
name = labels_decis[i],
marker = list(
color = CORES_DECIS[i],
line = list(color = "white", width = 0.8)
),
error_y = list(
type = "data",
array = ~upper - prop,
arrayminus = ~prop - lower,
visible = SHOW_CI_DECILES,
thickness = 1.5,
width = 4,
color = "gray40"
)
# ,
# hoverinfo = "text",
# text = ~paste0(
# "<b>", labels_decis[i], "</b><br>",
# "<b>Access Rate:</b> ", sprintf("%.1f", prop), "%<br>",
# "<b>CI (95%):</b> [", sprintf("%.1f", lower), "% – ",
# sprintf("%.1f", upper), "%]"
# ),
# showlegend = TRUE
)
}
# 6B) NATIONAL AVERAGE LINE (top layer — same style as PNAD code)
fig_2010_censo <- fig_2010_censo |>
add_trace(
data = taxa_nacional,
x = list(0.5, 10.5), # horizontal line across full chart
y = list(taxa_nacional$prop, taxa_nacional$prop),
type = "scatter",
mode = "lines",
name = paste0("<b>National Average: ",
sprintf("%.1f", taxa_nacional$prop), "%</b>"),
line = list(
color = COR_NATIONAL,
width = 3,
dash = "dash"
),
hoverinfo = "text",
text = paste0(
"<b>★ NATIONAL AVERAGE (18-24) ★</b><br>",
"<b>Access Rate:</b> ", sprintf("%.1f", taxa_nacional$prop), "%<br>",
"<b>CI (95%):</b> [", sprintf("%.1f", taxa_nacional$lower), "% – ",
sprintf("%.1f", taxa_nacional$upper), "%]"
),
showlegend = TRUE
)
# 6C) LAYOUT
fig_2010_censo <- fig_2010_censo |>
layout(
title = list(
text = "<b>Tertiary education access rate by income decile in Brazil, 2010</b>",
x = 0.5,
y = 0.97,
font = list(size = 20)
),
xaxis = list(
title = "<b>Income decile (D1 = poorest · D10 = richest)</b>",
tickmode = "array",
tickvals = 1:10,
ticktext = paste0("D", 1:10),
gridcolor = "#E5E5E5"
),
yaxis = list(
title = "<b>Access rate (%)</b>",
ticksuffix = "%",
range = c(0, 100),
gridcolor = "#E5E5E5"
),
legend = list(
x = 0.02,
y = 0.98,
xanchor = "left",
yanchor = "top",
bgcolor = "rgba(255,255,255,0.85)",
bordercolor = "#CCCCCC",
borderwidth = 1,
font = list(size = 9),
traceorder = "reversed"
),
annotations = list(
list(
x = 0.5,
y = -0.15,
text = paste0(
"Source: Prepared by the author based on Brazil's Demographic Census 2010 (IBGE).",
" Microdata accessed via {censobr} (IPEA).<br>",
"Tertiary access: currently enrolled (V0628 & V0629) OR previously attended (V0633).",
" Ages 18–24. 95% CI via svyciprop (logit)."
),
showarrow = FALSE,
xref = "paper",
yref = "paper",
xanchor = "center",
yanchor = "top",
font = list(size = 11, color = "#555555")
)
),
barmode = "group",
margin = list(t = 90, b = 130, l = 60, r = 40),
plot_bgcolor = "white",
paper_bgcolor = "white",
hovermode = "closest"
)
fig_2010_censo
cat("\n✅ Chart generated: 2010_CensoBR_Enrollment_rates_per_decile_GREEN_national_average\n")
cat("\nADJUSTABLE SETTINGS (lines 20-23):\n")
cat(" • COR_NATIONAL: ", COR_NATIONAL, "\n", sep = "")
cat(" • SHOW_CI_NATIONAL: ", SHOW_CI_NATIONAL, "\n", sep = "")
cat(" • SHOW_CI_DECILES: ", SHOW_CI_DECILES, "\n", sep = "")
220_3_PNAD_Income_per_decile_INDIVIDUAL_TOGGLE_AGGREGATES
# 220_3_PNAD_Income_per_decile_INDIVIDUAL_TOGGLE_AGGREGATES
#==============================================================================
# MELHORIAS EM RELAÇÃO AO 190_2:
# 1) Cada decil é independente na legenda (sem legendgroup compartilhado)
# 2) Grupos agregados: Bottom 40%, Bottom 50%, Bottom 80%
# → Renda média ponderada do grupo inteiro por ano
# 3) Grupos agregados com linha tracejada para distinguir dos decis
#==============================================================================
library(dplyr)
library(survey)
library(Hmisc)
library(plotly)
# --- CONFIGURAÇÕES DE VISUALIZAÇÃO ---
COR_MEDIA_NACIONAL <- "#2ca02c"
LARGURA_MEDIA_NACIONAL <- 4
TAMANHO_MARCADOR_MEDIA <- 12
MOSTRAR_IC_MEDIA_NACIONAL <- TRUE
# Grupos agregados: nome, decis incluídos, cor
GRUPOS_AGREGADOS <- list(
list(nome = "<b>Bottom 40%</b> (D1–D4)", decis = 1:4, cor = "#e377c2", dash = "dash"),
list(nome = "<b>Bottom 50%</b> (D1–D5)", decis = 1:5, cor = "#ff7f0e", dash = "dashdot"),
list(nome = "<b>Bottom 80%</b> (D1–D8)", decis = 1:8, cor = "#9467bd", dash = "dot")
)
# =============================================================================
# 1) PREPARAÇÃO DOS DECIS NACIONAIS
# =============================================================================
pop_decis <- Salata_etal_2025_dataset %>%
group_by(ano) %>%
mutate(
decil_renda = cut(
renda_dom_pcta,
breaks = Hmisc::wtd.quantile(renda_dom_pcta, weights = peso, probs = seq(0, 1, 0.1)),
labels = paste0("D", 1:10),
include_lowest = TRUE
)
) %>%
ungroup() %>%
filter(!is.na(decil_renda)) %>%
mutate(decil_num = as.numeric(gsub("D", "", decil_renda)))
anos_disponiveis <- sort(unique(pop_decis$ano))
# =============================================================================
# 2) CÁLCULO: DECIS + NACIONAL + GRUPOS AGREGADOS
# =============================================================================
lista_decis <- list()
lista_nacional <- list()
# Uma lista por grupo agregado
lista_agr <- lapply(seq_along(GRUPOS_AGREGADOS), function(x) list())
for (a in anos_disponiveis) {
df_ano <- pop_decis %>% filter(ano == a)
design_pop <- svydesign(ids = ~1, weights = ~peso, data = df_ano)
# --- Por decil ---
renda_decil <- svyby(~renda_dom_pcta, ~decil_renda, design_pop, svymean, vartype = "ci")
renda_decil <- renda_decil %>%
mutate(ano = a, decil = as.character(decil_renda),
media = renda_dom_pcta, lower = ci_l, upper = ci_u)
lista_decis[[as.character(a)]] <- renda_decil
# --- Nacional ---
m_nac <- svymean(~renda_dom_pcta, design_pop)
ci_nac <- confint(m_nac)
lista_nacional[[as.character(a)]] <- data.frame(
ano = a,
media = as.numeric(m_nac["renda_dom_pcta"]),
lower = ci_nac["renda_dom_pcta", "2.5 %"],
upper = ci_nac["renda_dom_pcta", "97.5 %"]
)
# --- Grupos agregados ---
for (g in seq_along(GRUPOS_AGREGADOS)) {
df_grupo <- df_ano %>% filter(decil_num %in% GRUPOS_AGREGADOS[[g]]$decis)
design_grupo <- svydesign(ids = ~1, weights = ~peso, data = df_grupo)
m_g <- svymean(~renda_dom_pcta, design_grupo)
ci_g <- confint(m_g)
lista_agr[[g]][[as.character(a)]] <- data.frame(
ano = a,
media = as.numeric(m_g["renda_dom_pcta"]),
lower = ci_g["renda_dom_pcta", "2.5 %"],
upper = ci_g["renda_dom_pcta", "97.5 %"]
)
}
}
dados_decis <- bind_rows(lista_decis) %>%
mutate(decil_num = as.numeric(gsub("D", "", decil)))
dados_nacional <- bind_rows(lista_nacional)
dados_agr <- lapply(lista_agr, bind_rows)
# =============================================================================
# 3) RÓTULOS E CORES
# =============================================================================
labels_decis <- c(
"Decile 1: bottom 10%", "Decile 2: 10–20%", "Decile 3: 20–30%",
"Decile 4: 30–40%", "Decile 5: 40–50%", "Decile 6: 50–60%",
"Decile 7: 60–70%", "Decile 8: 70–80%", "Decile 9: 80–90%",
"Decile 10: top 10%"
)
cores <- colorRampPalette(c("#D7191C", "#FFFFBF", "#2B83BA"))(10)
# =============================================================================
# 4) CONSTRUÇÃO DO GRÁFICO
# =============================================================================
fig_220_3 <- plot_ly()
# ── 4A) DECIS INDIVIDUAIS ─────────────────────────────────────────────────────
# CHAVE: SEM legendgroup compartilhado → cada trace é independente na legenda
for (i in 1:10) {
df_d <- dados_decis %>% filter(decil_num == i)
fig_220_3 <- fig_220_3 %>%
add_trace(
data = df_d, x = ~ano, y = ~media,
type = 'scatter', mode = 'lines+markers',
name = labels_decis[i],
line = list(color = cores[i], width = 2.5),
marker = list(color = cores[i], size = 7),
error_y = list(
type = "data", array = ~upper - media, arrayminus = ~media - lower,
visible = TRUE, thickness = 1, width = 2, color = cores[i]
),
hoverinfo = "text",
text = ~paste0(
"<b>Year:</b> ", ano, "<br>",
"<b>", labels_decis[i], "</b><br>",
"<b>Mean Income:</b> R$ ", formatC(media, format = "f", digits = 0, big.mark = ","), "<br>",
"<b>CI (95%):</b> [R$ ", formatC(lower, format = "f", digits = 0, big.mark = ","),
" – R$ ", formatC(upper, format = "f", digits = 0, big.mark = ","), "]"
),
# ↓ legendgroup ÚNICO por decil → toggle individual
legendgroup = paste0("decil_", i),
showlegend = TRUE
)
}
# ── 4B) GRUPOS AGREGADOS (linha tracejada, marcador triângulo) ────────────────
for (g in seq_along(GRUPOS_AGREGADOS)) {
grp <- GRUPOS_AGREGADOS[[g]]
df_g <- dados_agr[[g]]
fig_220_3 <- fig_220_3 %>%
add_trace(
data = df_g, x = ~ano, y = ~media,
type = 'scatter', mode = 'lines+markers',
name = grp$nome,
line = list(color = grp$cor, width = 3, dash = grp$dash),
marker = list(color = grp$cor, size = 9, symbol = "triangle-up",
line = list(color = "white", width = 1)),
error_y = list(
type = "data", array = ~upper - media, arrayminus = ~media - lower,
visible = TRUE, thickness = 1.5, width = 3, color = grp$cor
),
hoverinfo = "text",
text = ~paste0(
"<b>Year:</b> ", ano, "<br>",
"<b>", grp$nome, "</b><br>",
"<b>Mean Income:</b> R$ ", formatC(media, format = "f", digits = 0, big.mark = ","), "<br>",
"<b>CI (95%):</b> [R$ ", formatC(lower, format = "f", digits = 0, big.mark = ","),
" – R$ ", formatC(upper, format = "f", digits = 0, big.mark = ","), "]"
),
legendgroup = paste0("agregado_", g),
showlegend = TRUE
)
}
# ── 4C) MÉDIA NACIONAL (camada superior) ──────────────────────────────────────
fig_220_3 <- fig_220_3 %>%
add_trace(
data = dados_nacional, x = ~ano, y = ~media,
type = 'scatter', mode = 'lines+markers',
name = "<b>National Average</b>",
line = list(color = COR_MEDIA_NACIONAL, width = LARGURA_MEDIA_NACIONAL),
marker = list(color = COR_MEDIA_NACIONAL, size = TAMANHO_MARCADOR_MEDIA,
symbol = "diamond", line = list(color = "white", width = 1.5)),
error_y = list(
type = "data", array = ~upper - media, arrayminus = ~media - lower,
visible = MOSTRAR_IC_MEDIA_NACIONAL, thickness = 2, width = 3,
color = COR_MEDIA_NACIONAL
),
hoverinfo = "text",
text = ~paste0(
"<b>Year:</b> ", ano, "<br>",
"<b>★ NATIONAL AVERAGE ★</b><br>",
"<b>Mean Income:</b> R$ ", formatC(media, format = "f", digits = 0, big.mark = ","), "<br>",
"<b>CI (95%):</b> [R$ ", formatC(lower, format = "f", digits = 0, big.mark = ","),
" – R$ ", formatC(upper, format = "f", digits = 0, big.mark = ","), "]"
),
legendgroup = "nacional",
showlegend = TRUE
)
# =============================================================================
# 5) LAYOUT
# =============================================================================
fig_220_3 <- fig_220_3 %>%
layout(
title = list(
text = "<b>Mean per capita household income by income decile in Brazil, 1992–2022</b>",
x = 0.5, y = 0.97, font = list(size = 20)
),
xaxis = list(
title = "<b>Year</b>", gridcolor = "#E5E5E5",
tickmode = "array", tickvals = anos_disponiveis,
tickangle = -45, font = list(size = 10)
),
yaxis = list(
title = "<b>Mean per capita household income (BRL)</b>",
tickprefix = "R$ ", gridcolor = "#E5E5E5"
),
legend = list(
x = 0.01, y = 0.99,
xanchor = "left", yanchor = "top",
bgcolor = 'rgba(255,255,255,0.85)',
bordercolor = "#CCCCCC", borderwidth = 1,
font = list(size = 9),
traceorder = "reversed"
),
annotations = list(list(
x = 0.5, y = -0.15,
text = "Source: Prepared by the author based on PNAD/PNAD-C (IBGE) data, harmonized by Salata et al. (2025).<br>Article, code and data available at doi:10.7910/DVN/UZ2SHW",
showarrow = FALSE, xref = 'paper', yref = 'paper',
xanchor = 'center', yanchor = 'top',
font = list(size = 14, color = "#555555")
)),
margin = list(t = 90, b = 120, l = 80, r = 40),
plot_bgcolor = "white",
paper_bgcolor = "white",
hovermode = "closest"
)
fig_220_3
cat("\n✅ Gráfico 220_3 gerado!\n")
cat("\nMELHORIAS IMPLEMENTADAS:\n")
cat("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
cat("✓ Toggle INDIVIDUAL por decil (legendgroup único por trace)\n")
cat("✓ Bottom 40% (D1–D4) → rosa, linha tracejada, triângulo\n")
cat("✓ Bottom 50% (D1–D5) → laranja, dash-dot, triângulo\n")
cat("✓ Bottom 80% (D1–D8) → roxo, dotted, triângulo\n")
cat("✓ Agregados calculados com svymean() no subconjunto dos decis\n")
Brazil Primary Enrollment Rates: Multi-Source Comparison (1857-2015) Paglayan, Kang & PNAD
# Brazil Primary Enrollment Rates: Multi-Source Historical Comparison (1857-2015)
Prepared using Claude Sonnet 4.5
This dataset combines three complementary sources of primary and elementary school enrollment rates in Brazil, offering comprehensive historical coverage spanning over 150 years of educational development.
## Data Sources
### 1. Paglayan (APSR) | 1857-2010 (154 years)
**Metric**: Primary enrollment ratio
**Calculation**:
- Numerator: Students enrolled in primary education
- Denominator: Population aged 5-14 years
**Source**:
Paglayan, A. S. (2021). The Non-Democratic Roots of Mass Education: Evidence from 200 Years. *American Political Science Review*, 115(1), 179–198.
https://doi.org/10.1017/s0003055420000647
**Coverage**: 154 years of historical data (1857-2010)
**Notes**: Longest time series, captures 19th and 20th century educational expansion
---
### 2. Kang et al. (FGV-IBRE) | 1933-2010 (78 years)
**Metric**: Gross enrollment rate - Elementary education (*Ensino Fundamental*)
**Calculation**:
- Numerator: Students enrolled in elementary education (grades 1-9, encompassing both primary and lower secondary levels)
- Denominator: Population aged 7-14 years
**Source**:
Kang, Thomas; Felix, Nilson; Paese, Luís (2021). Late and unequal: enrolments and retention in Brazilian education, 1933-2010. *Revista de História Económica / Journal of Iberian and Latin American Economic History*, 39(2), 191-218.
**Methodological Note**: https://ibre.fgv.br/observatorio-produtividade/artigos/relatorio-metodologico-dados-historicos-de-educacao-no-brasil
(Last updated: May 15, 2023)
**Coverage**: 78 years of data (1933-2010)
**Notes**: Most comprehensive institutional source; includes both primary and lower secondary enrollment
---
### 3. PNAD (IBGE via Base dos Dados) | 1992-2015 (20 years)
**Metric**: Net school attendance rate
**Calculation**:
- Numerator: Individuals aged 5-14 currently attending school (`frequenta_escola == "sim"`)
- Denominator: Total population aged 5-14 years
**Source**:
Pesquisa Nacional por Amostra de Domicílios (PNAD), Instituto Brasileiro de Geografia e Estatística (IBGE), accessed via Base dos Dados.
**Coverage**: 20 years of survey data (1992-2015)
**Data Gaps**: Missing years: 1994, 2000, 2010 (PNAD was not conducted in these years)
**Notes**: Household survey data; provides contemporary validation for historical sources
---
## Key Methodological Differences
| Dimension | Paglayan | Kang et al. | PNAD |
|-----------|----------|-------------|------|
| **Age Range** | 5-14 years | 7-14 years | 5-14 years |
| **Educational Level** | Primary only | Elementary (primary + lower secondary) | School attendance (any level) |
| **Rate Type** | Enrollment ratio | Gross enrollment rate | Net attendance rate |
| **Can exceed 100%?** | Yes | Yes (due to grade repetition) | No |
| **Time Period** | 1857-2010 | 1933-2010 | 1992-2015 (gaps: 1994, 2000, 2010) |
### Important Considerations
1. **Different age ranges**: Paglayan and PNAD use population aged 5-14, while Kang et al. use 7-14. This affects comparability, especially in early childhood enrollment.
2. **Educational scope**:
- Paglayan captures primary education specifically
- Kang et al. includes both primary (grades 1-5) and lower secondary (grades 6-9)
- PNAD measures school attendance regardless of level
3. **Gross vs. Net rates**:
- Paglayan and Kang report gross enrollment (can exceed 100% when students are older/younger than official age)
- PNAD reports net attendance (bounded at 100%)
4. **Historical context**: Brazil's educational system underwent major reforms, including:
- Creation of *Ensino Fundamental* (8 years) in 1971
- Extension to 9 years in 2006
- These changes affect cross-source comparability
## Citation
If using this combined dataset, please cite all three sources:
**Paglayan:**
Paglayan, A. S. (2021). The Non-Democratic Roots of Mass Education: Evidence from 200 Years. *American Political Science Review*, 115(1), 179–198. https://doi.org/10.1017/s0003055420000647
**Kang et al.:**
Kang, T., Felix, N., & Paese, L. (2021). Late and unequal: enrolments and retention in Brazilian education, 1933-2010. *Revista de História Económica / Journal of Iberian and Latin American Economic History*, 39(2), 191-218.
**PNAD:**
Instituto Brasileiro de Geografia e Estatística (IBGE). Pesquisa Nacional por Amostra de Domicílios (PNAD), 1992-2015. Accessed via Base dos Dados.
---
**Compiled by**: Tales Mançano
**Last Updated**: January 22, 2026
Prepared using Claude Sonnet 4.5
Comparação de Séries Históricas de Escolarização no Brasil: Paglayan (APSR) vs. Kang et al.
Este gráfico apresenta uma análise comparativa de duas séries históricas fundamentais para o estudo da educação no Brasil, evidenciando como diferentes escolhas metodológicas impactam a percepção do avanço educacional:
Metodologia de Paglayan (2024): A autora utiliza como métrica a matrícula no ensino primário dividida pela população total na faixa de 5 a 14 anos. Como apontado na discussão, essa escolha impõe um "vínculo" rígido ao denominador: se um estudante conclui o primário e avança para o ensino secundário antes dos 14 anos, ele deixa de ser contabilizado no numerador (matrícula primária), mas permanece no denominador (população 5-14). Isso tende a subestimar a cobertura real em sistemas onde a progressão para o ensino médio é mais rápida ou onde a estrutura do ensino primário é mais curta que o intervalo de 10 anos da base populacional.
Metodologia de Kang et al. (2021): Os autores calculam a Taxa Bruta de Escolarização (Gross Enrollment Ratio - GER) para o Ensino Fundamental. A GER é definida como o total de matrículas no nível de ensino (independentemente da idade do aluno) dividido pela população na idade teórica correspondente a esse nível. No caso brasileiro, o uso da GER permite que a taxa ultrapasse 100% (conforme visível no gráfico a partir de 1990), capturando de forma mais precisa o esforço de atendimento do Estado, inclusive de alunos com distorção idade-série e repetentes que ainda estão cursando o nível primário/fundamental.
Paglayan 2022 - Education or Indoctrination Primary School Enrollment Rate acording to her dataser
Paglayan 2022 - Education or Indoctrination Primary School Enrollment Rate acording to her dataser
Brazil_Count_Enrollment_Tertiary_Education_Public_vs_Private
Compilation of various data source, including the Anuário Estatístico Brasileiro, MaduroJunior2007, Durham2003, Kang Paese 2003, and INEP
Public vs. Private Number of Enrollments in Primary + Secondary Scholl in Brazil (1959-2007). Source: MaduroJunior20007
Public vs. Private Number of Enrollments in Primary + Secondary Scholl in Brazil (1959-2007). Source: MaduroJunior20007