Skip to contents

The krpoltext package provides access to two Korean political text corpora described in:

Lim, T.H. (2025). South Korean Election Campaign Booklet and Party Statements Corpora. Scientific Data, 12, 1030. https://doi.org/10.1038/s41597-025-05220-4

Installation

# install.packages("remotes")
remotes::install_github("taehyun-lim/krpoltext")

Load a Dataset

Data is automatically downloaded from OSF on first use and cached locally as RDS.

library(krpoltext)

# Load the party statements corpus (~741 MB download on first use)
ps <- load_party_statements()
ps

Explore Metadata

meta <- metadata("party_statements")
meta$name
meta$time_coverage
meta$n_candidates_or_entries
meta$columns

Filter Documents

get_docs() dynamically filters on any column that exists in the dataset.

# Statements from 2020
docs_2020 <- get_docs("party_statements", year = 2020, .data = ps)
nrow(docs_2020)

# Conservative party statements from 2018-2022
conservative_recent <- get_docs(
  "party_statements",
  year = 2018:2022,
  conservative = 1,
  .data = ps
)
nrow(conservative_recent)

Quick Summary

table(ps$year)
table(ps$partisan)

Campaign Booklets

cb <- load_campaign_booklet()

# National Assembly candidates only
assembly <- get_docs("campaign_booklet", office = "national_assembly", .data = cb)
nrow(assembly)

# Filter by party
table(assembly$party_eng)

Next Steps