--- title:  "Descriptive Statistics from the SK Multicultural Data V2" format:     html:     self-contained: true     number-sections: true     toc: true     toc-location: left     toc-depth: 3     code-fold: false     collapse: true     code-tools: true editor:  visual --- ```{r set_environment} #| echo: FALSE #| results: "hide" #| message: FALSE library(tidyverse) library(plotly) library(haven) setwd("~/Dropbox/MEA-THL/Multicultural/code") ``` ## Using Multicultural Household Survey {.panel-tabset .panel-tabset-closed} ```{r load} #| echo: FALSE #| message: FALSE fam_2015 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성원_2015.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_fam"))) %>%    rename(hhid = v3_fam) fam_2018 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성원_2018.dta")%>%   rename_with(.fn = ~ ifelse(.x == c("v1", "v2"), .x, paste0(.x, "_fam"))) fam_2021 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성원_2021.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_fam"))) tab_2015 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성표_2015.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_tab")))%>%    rename(hhid = v1) tab_2018 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성표_2018.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_tab"))) tab_2021 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/가구구성표_2021.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_tab"))) self_2015 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/본인_2015.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_self")))%>%    rename(hhid = v2_self) self_2018 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/본인_2018.dta")%>%   rename_with(.fn = ~ ifelse(.x == c("v1", "v2"), .x, paste0(.x, "_self"))) self_2021 <- read_dta("/Users/tlim/Dropbox/Data/전국다문화가족실태조사/본인_2021.dta")%>%   rename_with(.fn = ~ ifelse(.x == "v1", .x, paste0(.x, "_self"))) ``` ```{r merge} #| echo: false #| message: FALSE df_2015 <- full_join(fam_2015, tab_2015) %>%    full_join(self_2015) %>%    mutate(wave="2015") %>%    rename(id = v1,          relation = v4_fam,          entry_yr = v16_fam,          naturalized_bi = v17_fam,          naturalize_yr = v18_fam,          nationality_birth = v14_fam,          income = v8_tab,          weight = v10_tab) df_2018 <- full_join(fam_2018, tab_2018) %>%    full_join(self_2018) %>%    mutate(wave="2018") %>%    rename(hhid = v1,          relation = v4_fam,          entry_yr = v16_fam,          naturalized_bi = v17_fam,          naturalize_yr = v18_fam,          nationality_birth = v12_fam,          income = v36_tab,          weight = v38_tab) df_2021 <- full_join(fam_2021, tab_2021) %>%    full_join(self_2021) %>%    mutate(wave="2021") %>%    rename(hhid = v1,          relation = v3_fam,          entry_yr = v18_fam,          naturalized_bi = v19_fam,          naturalize_yr = v20_fam,          nationality_birth = v13_fam,          income = v8_tab,          weight = v11_tab) ``` ### Number of Naturalized Respondents in the Sample <!-- <details> --> \[ 귀화연도 있음\]  or 2 1 \[ 1. 해당\] ```{r naturalize} #| echo: FALSE #| message: FALSE nat_2015 <- df_2015  %>% filter(relation == 1) %>% count(naturalize_yr) df_2015 %>% filter(relation == 1) %>% count(naturalized_bi)  ##귀화여부 df_2015 <- df_2015 %>%   mutate(years_kr = as.numeric(naturalize_yr) - as.numeric(entry_yr)) yrs_2015 <- df_2015 %>% filter(relation == 1) %>% count(years_kr) yrs_2015_pre <- df_2015 %>% filter(relation == 1) %>% filter(entry_yr < 2011) %>% count(years_kr) yrs_2015_post <- df_2015 %>% filter(relation == 1) %>% filter(entry_yr > 2011) %>% count(years_kr) nat_2018 <- df_2018  %>% filter(relation == 1) %>% count(naturalize_yr) df_2018 %>% filter(relation == 1) %>% count(naturalized_bi)  ##귀화여부 df_2018 <- df_2018 %>%   mutate(years_kr = as.numeric(naturalize_yr) - as.numeric(entry_yr)) yrs_2018 <- df_2018 %>% filter(relation == 1) %>% count(years_kr) yrs_2018_pre <- df_2018 %>% filter(relation == 1) %>% filter(entry_yr < 2011) %>% count(years_kr) yrs_2018_post <- df_2018 %>% filter(relation == 1) %>% filter(entry_yr > 2011) %>% count(years_kr) nat_2021 <- df_2021  %>% filter(relation == 1) %>% count(naturalize_yr) df_2021 %>% filter(relation == 1) %>% count(naturalized_bi)  ##귀화여부 df_2021 <- df_2021 %>%   mutate(years_kr = as.numeric(naturalize_yr) - as.numeric(entry_yr)) yrs_2021 <- df_2021 %>% filter(relation == 1) %>% count(years_kr) yrs_2021_pre <- df_2021 %>% filter(relation == 1) %>% filter(entry_yr < 2011) %>% count(years_kr) yrs_2021_post <- df_2021 %>% filter(relation == 1) %>% filter(entry_yr > 2011) %>% count(years_kr) ``` ```{r filter_n_merge} #| echo: FALSE #| message: FALSE df_slim_2015 <- df_2015 %>%    select(hhid, relation, naturalize_yr, naturalized_bi, nationality_birth, entry_yr, years_kr, income, weight, wave) %>%   mutate(relation = as.numeric(relation),          naturalized_bi = as.numeric(naturalized_bi),          nationality_birth = as.numeric(nationality_birth),          income = as.numeric(income)) df_slim_2018 <- df_2018 %>%    select(hhid, relation, naturalize_yr, naturalized_bi, nationality_birth, entry_yr, years_kr, income, weight, wave) %>%   mutate(relation = as.numeric(relation),          naturalized_bi = as.numeric(naturalized_bi),          nationality_birth = as.numeric(nationality_birth),          income = as.numeric(income)) df_slim_2021 <- df_2021 %>%    select(hhid, relation, naturalize_yr, naturalized_bi, nationality_birth, entry_yr, years_kr, income, weight, wave)  label_info <- tibble(   varname = names(df_slim_2021),   # Get labels using val_labels(), handling NULLs, and convert to tibbles immediately   label_data = lapply(df_slim_2021, function(col) {     # *** Use val_labels() instead of labels() ***     lbls <- labelled::val_labels(col)     if (!is.null(lbls) && length(lbls) > 0) {       # Convert the named vector into a tibble with 'label' and 'code' columns       tibble::enframe(lbls, name = "label", value = "code")     } else {       # Return NULL if no labels, so it can be filtered out       NULL     }   }) ) %>%   # Filter out variables that didn't have labels (where label_data is NULL)   filter(!sapply(label_data, is.null)) %>%   # Now unnest the tibbles contained in the label_data list-column   unnest(label_data) %>%   # Select the desired columns (already correctly named)   select(varname, code, label) df_slim_2021 <- df_slim_2021 %>%   mutate(relation = as.numeric(relation),          naturalized_bi = as.numeric(naturalized_bi),          nationality_birth = as.numeric(nationality_birth),          income = as.numeric(income)) df_slim <- rbind(df_slim_2015, df_slim_2018) %>%    rbind(df_slim_2021) ``` <!-- </details> --> ### Number of Naturalizations by Year <!-- <details> --> <!-- <summary>Figure</summary> --> ```{r plot_naturalize} #| echo: FALSE plot_naturalize <- df_slim %>%   filter(relation == 1, !is.na(naturalize_yr)) %>%   mutate(naturalize_yr = as.numeric(as.character(naturalize_yr))) %>%   filter(!is.na(naturalize_yr)) %>%   group_by(naturalize_yr) %>%   summarize(weighted_n = sum(weight, na.rm = TRUE)) %>%   ggplot(aes(x = naturalize_yr, y = weighted_n)) +   geom_line() +   geom_point() +   labs(     title = "Weighted Number of Naturalizations by Year",     x = "Year of Naturalization",     y = "Weighted Frequency"   ) +   geom_vline(xintercept = c(2010, 2011), linetype = "dashed", color = "red") +   scale_x_continuous(breaks = c(1940, 1960, 1980, 2000, 2010, 2020)) +   xlim(1996,2021) +   theme_minimal() ggplotly(plot_naturalize) #ggsave("plot_naturalize.png", plot = plot_naturalize, width = 8, height = 6, dpi = 300) ``` <!-- </details> --> ### By Allowing Dual Citizenship <!-- <details> --> <!-- <summary>Figure</summary> --> ```{r dual_citizenship_data} #| echo: FALSE #| message: FALSE #The 2011 reform would only affect those marriage migrants whose home countries allow dual citizenship.  Vietnam, Philippines and Thailand do, but not Japan, Mongolia, China. df_dual <- df_slim %>%    filter(nationality_birth %in% c(112, 113, 120, 122, 130, 145, 155, 170, 185)) %>%  # 중국 112, 대만 113, 홍콩 120, 한국계 중국인 122, 일본 130, 몽골 145, 필리핀 155, 태국(타이) 170, 베트남 185   mutate(     dual = ifelse(nationality_birth %in% c(155, 170, 185), 1, 0)   ) ``` ```{r dual_citizenship_figure} #| echo: FALSE #| message: FALSE plot_dual <- df_dual %>%   filter(relation == 1) %>%   filter(!is.na(naturalize_yr)) %>%   mutate(naturalize_yr = as.numeric(as.character(naturalize_yr))) %>%   filter(!is.na(naturalize_yr)) %>%   group_by(dual, naturalize_yr) %>%   summarise(weighted_n = sum(weight), na.rm = TRUE, .groups = 'drop') %>%   mutate(dual = factor(dual, levels = c(0, 1), labels = c("Not Allowed", "Allowed"))) %>%    ggplot(aes(x = naturalize_yr, y = weighted_n, color = dual, group = dual)) +   geom_line() +   geom_point() +   labs(     title = "Number of Naturalizations by Year, by Dual Citizenship Status",     x = "Year of Naturalization",     y = "Weighted Frequency",     color = "Dual Citizenship" # Legend title (adjust if needed)   ) +   geom_vline(xintercept = c(2005, 2010, 2011), linetype = "dashed", color = "red") +   scale_x_continuous(breaks = c(1940, 1960, 1980, 2000, 2005, 2010, 2020)) +   xlim(1996,2021) +   theme_minimal() +   theme(legend.position = "bottom")    plotly_dual <- ggplotly(plot_dual) # 3. Modify the plotly object's layout to move the legend plotly_dual_final <- plotly_dual %>%   layout(legend = list(              orientation = "h",   # "h" for horizontal orientation              xanchor = "center",  # Anchor point of the legend ("auto", "left", "center", "right")              x = 0.5,             # Horizontal position (0 to 1, 0.5 is center)              yanchor = "top",     # Anchor point of the legend ("auto", "top", "middle", "bottom")              y = -0.2             # Vertical position (use negative values to place below plot, adjust distance)           )) # 4. Display the modified plotly object plotly_dual_final ``` <!-- </details> --> ### By Country <!-- <details> --> <!-- <summary>Figure</summary> --> ```{r country_figure} #| echo: FALSE #| message: FALSE plot_country <- df_slim %>%    filter(nationality_birth %in% c(112, 113, 120, 122, 130, 145, 155, 170, 185)) %>%   filter(relation == 1) %>%   filter(!is.na(naturalize_yr)) %>%   mutate(naturalize_yr = as.numeric(as.character(naturalize_yr))) %>%   filter(!is.na(naturalize_yr)) %>%   group_by(nationality_birth, naturalize_yr) %>%   summarise(weighted_n = sum(weight), na.rm = TRUE, .groups = 'drop') %>%   mutate(nationality_birth = factor(nationality_birth)) %>%    ggplot(aes(x = naturalize_yr, y = weighted_n, color = nationality_birth, group = nationality_birth)) +   geom_line() +   geom_point() +   labs(     title = "Number of Naturalizations by Year, by Country",     x = "Year of Naturalization",     y = "Weighted Frequency",     color = "Nationality at Birth"   ) +   geom_vline(xintercept = c(2005, 2010, 2011), linetype = "dashed", color = "red") +   scale_x_continuous(breaks = c(1940, 1960, 1980, 2000, 2005, 2010, 2020)) +   xlim(1996,2021) +   theme_minimal() +   theme(legend.position = "bottom") plotly_country <- ggplotly(plot_country) # 3. Modify the plotly object's layout to move the legend plotly_country_final <- plotly_country %>%   layout(legend = list(     orientation = "h",   # "h" for horizontal orientation     xanchor = "center",  # Anchor point of the legend ("auto", "left", "center", "right")     x = 0.5,             # Horizontal position (0 to 1, 0.5 is center)     yanchor = "top",     # Anchor point of the legend ("auto", "top", "middle", "bottom")     y = -0.2             # Vertical position (use negative values to place below plot, adjust distance)   )) # 4. Display the modified plotly object plotly_country_final ``` <!-- </details> --> ### By Income <!-- <details> --> <!-- <summary>Figure</summary> --> ```{r income_plot} #| echo: FALSE #| message: FALSE plot_income <- df_slim %>%   filter(relation == 1) %>%   filter(!is.na(naturalize_yr)) %>%   mutate(naturalize_yr = as.numeric(as.character(naturalize_yr)),          income_bi = ifelse(income < 3, 0, 1),          income = factor(income)) %>%   filter(!is.na(naturalize_yr)) %>%   group_by(income, naturalize_yr) %>%   summarise(weighted_n = sum(weight), na.rm = TRUE, .groups = 'drop') %>%   ggplot(aes(x = naturalize_yr, y = weighted_n, color = income, group = income)) +   geom_line() +   geom_point() +   labs(     title = "Number of Naturalizations by Year, by Income Groups",     x = "Year of Naturalization",     y = "Weighted Frequency",     color = "Income Groups" # Legend title (adjust if needed)   ) +   geom_vline(xintercept = c(2005, 2010, 2011, 2018), linetype = "dashed", color = "red") +   scale_x_continuous(breaks = c(1940, 1960, 1980, 2000, 2005, 2010, 2018, 2020)) +   xlim(1996,2021) +   theme_minimal() +   theme(legend.position = "bottom")    plotly_income <- ggplotly(plot_income) # 3. Modify the plotly object's layout to move the legend plotly_income_final <- plotly_income %>%   layout(legend = list(              orientation = "h",   # "h" for horizontal orientation              xanchor = "center",  # Anchor point of the legend ("auto", "left", "center", "right")              x = 0.5,             # Horizontal position (0 to 1, 0.5 is center)              yanchor = "top",     # Anchor point of the legend ("auto", "top", "middle", "bottom")              y = -0.2             # Vertical position (use negative values to place below plot, adjust distance)           )) # 4. Display the modified plotly object plotly_income_final ``` <!-- </details> --> ```{r income_table} #| echo: FALSE #| message: FALSE label_info %>%    filter(varname=="income") %>%    mutate(`Monthly Income`=case_when(       code == 1 ~ "$1,000",        code == 2 ~ "$2,000",        code == 3 ~ "$3,000",        code == 4 ~ "$4,000",        code == 5 ~ "$5,000",        code == 6 ~ "$6,000",        code == 7 ~ "$7,000",        code == 8 ~ "$8,000",        code == 9 ~ "over $8,000",        TRUE ~ NA_character_      )) ``` ## Using Government Statistics {.panel-tabset .panel-tabset-closed} ### Number of Naturalized Aliens by Year  <!-- <details> --> <!-- <summary>Figure</summary> --> [ index.go.kr, 지표누리 e-나라지표 국적통계 추이 ](https://www.index.go.kr/unity/potal/main/EachDtlPageDetail.do?idx_cd=1760) .```{r stat_naturalized} #| echo: FALSE #| message: FALSE library(readxl) yearly_naturalize <- read_excel("~/Dropbox/MEA-THL/Multicultural/data/yearly_naturalize.xlsx") %>%    mutate(year = as.numeric(year),          naturalize = as.numeric(naturalize),          recover = as.numeric(recover)) plot_stat_naturalize <- yearly_naturalize %>%     ggplot(aes(x = year, y = naturalize)) +   geom_line() +   geom_point() +   labs(title = "Number of Naturalizations by Year",        x = "Year of Naturalization",        y = "Frequency") +   geom_vline(xintercept = c(2005, 2010, 2011, 2014, 2020), linetype = "dashed", color = "red") +   scale_x_continuous(breaks = c(1940, 1960, 1980, 2000, 2010, 2020)) +   theme_minimal() ggplotly(plot_stat_naturalize) ``` <!-- </details> --> ### Number of Naturalized Aliense by Year by Allowing Dual Citizenship   <!-- <details> --> <!-- <summary>Figure</summary> --> [ data.go.kr, 공공데이터포털 법무부_55(국적) 연도별 국적취득 국적(지역)별 현황 ](https://www.data.go.kr/data/15100047/fileData.do) .```{r stat_dual} #| echo: FALSE #| message: FALSE #| warning: FALSE country_natrualize <- read_csv("~/Dropbox/MEA-THL/Multicultural/data/yearly_country_naturalize.csv", locale = locale(encoding = "CP949")) %>%   rename(     year = 년,     type = 유형,     country = 국적,     frequency = 건수   ) %>%   mutate(     dual = case_when(       country %in% c("베트남", "필리핀", "태국") ~ 1,       country %in% c("중국", "타이완", "한국계 중국인", "홍콩", "일본", "몽골") ~ 0,       TRUE ~ NA_real_     ),   )  plot_dual <- country_natrualize %>%   filter(!is.na(dual)) %>%   group_by(dual, year) %>%    summarise(n = sum(frequency, na.rm = TRUE), .groups = 'drop') %>%    mutate(dual = factor(dual, levels = c(0, 1), labels = c("Not Allowed", "Allowed"))) %>%    ggplot(aes(x = year, y = n, color = dual, group = dual)) +   geom_line() +   geom_point() +   labs(     title = "Number of Naturalizations by Year, by Dual Citizenship Status",     x = "Year of Naturalization",     y = "Frequency",     color = "Dual Citizenship"   ) +   scale_x_continuous(breaks = c(2011, 2014, 2017, 2020)) +   ylim(0, 5800) +   theme_minimal() +   theme(legend.position = "bottom")    plotly_dual <- ggplotly(plot_dual) # 3. Modify the plotly object's layout to move the legend plotly_dual_final <- plotly_dual %>%   layout(legend = list(              orientation = "h",   # "h" for horizontal orientation              xanchor = "center",  # Anchor point of the legend ("auto", "left", "center", "right")              x = 0.5,             # Horizontal position (0 to 1, 0.5 is center)              yanchor = "top",     # Anchor point of the legend ("auto", "top", "middle", "bottom")              y = -0.2             # Vertical position (use negative values to place below plot, adjust distance)           )) # 4. Display the modified plotly object plotly_dual_final ``` <!-- </details> -->