{
  "dataset": "campaign_booklet",
  "name": "South Korean Election Campaign Booklets",
  "description": "Enriched campaign booklet artifact using the same document-row universe as the original CSV source, with conservative NEC linkage fields such as 'huboid', 'sg_id', and 'sg_typecode' added to improve interoperability with kr-elections-mcp and related NEC-aligned workflows.",
  "time_coverage": "2000-2022",
  "data_version": "v2022",
  "package_version": "0.2.0",
  "variant": "enriched",
  "default_variant": "original",
  "available_variants": ["original", "enriched"],
  "variant_description": "The same document-row universe as the original CSV source, plus conservative NEC linkage fields for integration workflows.",
  "recommended_use": "NEC-aligned workflows, kr-elections-mcp, and linkage-aware joins.",
  "identifier_columns": "code",
  "text_columns": ["text", "filtered"],
  "supported_formats": ["csv", "parquet"],
  "managed_formats": ["csv", "parquet"],
  "artifacts": {
    "csv": {
      "format": "csv",
      "file": "sk_election_campaign_booklet_enriched_v2022.csv",
      "download_url": "https://osf.io/download/69e3eec5352dbdd881fd8d7b/",
      "sha256": "08779d4c27a02635c7bf08a332170ac0a5bf1295e825e3b29061c62f95598586",
      "size_bytes": 760045361,
      "managed": true
    },
    "parquet": {
      "format": "parquet",
      "file": "sk_election_campaign_booklet_enriched_v2022.parquet",
      "download_url": "https://osf.io/download/69e3ee72a0e06b0928fd8ae2/",
      "sha256": "d8901cd2cebef30116f8865847727bb10855478ee556bc0dcfb5a04e838ad8f4",
      "size_bytes": 406231949,
      "managed": true
    }
  },
  "columns": [
    {
      "name": "date",
      "type": "character",
      "description": "Election date (YYYY-MM-DD)"
    },
    {
      "name": "name",
      "type": "character",
      "description": "Candidate name (Korean)"
    },
    {
      "name": "region",
      "type": "character",
      "description": "Metropolitan region (province or metropolitan city)"
    },
    {
      "name": "district",
      "type": "character",
      "description": "Electoral district"
    },
    {
      "name": "office_id",
      "type": "integer",
      "description": "Office type identifier (1=president, 2=national_assembly, 3=edu_superintendent, 4=metro_head, 5=metro_assembly, 6=basic_head, 7=basic_assembly)"
    },
    {
      "name": "office",
      "type": "character",
      "description": "Office type label (president, national_assembly, edu_superintendent, metro_head, metro_assembly, basic_head, basic_assembly)"
    },
    {
      "name": "giho",
      "type": "integer",
      "description": "Candidate ballot number"
    },
    {
      "name": "party",
      "type": "character",
      "description": "Political party name (Korean)"
    },
    {
      "name": "party_eng",
      "type": "character",
      "description": "Political party name (English); transliteration if no official English name"
    },
    {
      "name": "result",
      "type": "character",
      "description": "Election result in Korean"
    },
    {
      "name": "sex",
      "type": "character",
      "description": "Sex in Korean"
    },
    {
      "name": "birthday",
      "type": "character",
      "description": "Date of birth (YYYY-MM-DD)"
    },
    {
      "name": "age",
      "type": "integer",
      "description": "Age at the time of the election"
    },
    {
      "name": "job_id",
      "type": "integer",
      "description": "Original NEC job category identifier (varies across years)"
    },
    {
      "name": "job",
      "type": "character",
      "description": "Standardized job category (Korean)"
    },
    {
      "name": "job_name",
      "type": "character",
      "description": "Job title (Korean)"
    },
    {
      "name": "job_name_eng",
      "type": "character",
      "description": "Job title (English)"
    },
    {
      "name": "job_code",
      "type": "integer",
      "description": "Standardized job code consistent across years"
    },
    {
      "name": "edu_id",
      "type": "integer",
      "description": "Original NEC education level identifier (varies across years)"
    },
    {
      "name": "edu",
      "type": "character",
      "description": "Education description (Korean, free-text from NEC)"
    },
    {
      "name": "edu_name",
      "type": "character",
      "description": "Standardized education level label (Korean)"
    },
    {
      "name": "edu_name_eng",
      "type": "character",
      "description": "Standardized education level label (English)"
    },
    {
      "name": "edu_code",
      "type": "integer",
      "description": "Standardized education code consistent across years"
    },
    {
      "name": "career1",
      "type": "character",
      "description": "Career description 1"
    },
    {
      "name": "career2",
      "type": "character",
      "description": "Career description 2"
    },
    {
      "name": "pages",
      "type": "integer",
      "description": "Number of pages in the booklet"
    },
    {
      "name": "code",
      "type": "character",
      "description": "krpoltext document row identifier",
      "identifier": true
    },
    {
      "name": "huboid",
      "type": "character",
      "description": "Linked NEC candidate identifier used for conservative kr-elections-mcp alignment; unresolved rows remain NA"
    },
    {
      "name": "sg_id",
      "type": "character",
      "description": "Linked NEC election identifier used for NEC-aligned workflows"
    },
    {
      "name": "sg_typecode",
      "type": "character",
      "description": "Linked NEC election type identifier used for NEC-aligned workflows"
    },
    {
      "name": "link_status",
      "type": "character",
      "description": "Linkage status for NEC alignment (resolved, ambiguous, not_found, rejected)"
    },
    {
      "name": "matcher_version",
      "type": "character",
      "description": "Version of the linkage pipeline used to assign NEC fields"
    },
    {
      "name": "nec_snapshot_id",
      "type": "character",
      "description": "Identifier of the NEC snapshot used to assign NEC fields"
    },
    {
      "name": "sex_code",
      "type": "integer",
      "description": "Sex code: 1 = male, 0 = female"
    },
    {
      "name": "result_code",
      "type": "integer",
      "description": "Result code: 1 = elected, 0 = not elected"
    },
    {
      "name": "text",
      "type": "character",
      "description": "Full OCR-extracted text of the campaign booklet"
    },
    {
      "name": "filtered",
      "type": "character",
      "description": "Parsed text after morphological analysis; Korean-only, numbers, foreign characters, and symbols removed"
    }
  ],
  "notes": {
    "missing_values": "2,283 rows have no booklet code or text because a booklet was not available. 151 are missing biographical information. 23 booklets were unprocessable.",
    "text_processing": "All text is UTF-8 encoded Korean. 'text' contains the full original text; 'filtered' contains the morphologically parsed version.",
    "identifiers": "'code' is the krpoltext document row identifier, but some rows have missing code values, so row identity should not be inferred from code alone. 'huboid' is a linked NEC identifier, not a native krpoltext identifier. Rows with 'link_status == \"resolved\"' are expected to have a non-null 'huboid'. 'sg_id' and 'sg_typecode' describe the NEC-aligned election scope attached to the row. 'job_id' and 'edu_id' vary across election years; use 'job_code' and 'edu_code' for cross-year analysis.",
    "provenance": "The enriched variant is a row-preserving transformation of the original campaign_booklet CSV source. It adds conservative NEC linkage metadata to improve interoperability with kr-elections-mcp and related NEC-aligned workflows.",
    "artifact_transition": "When the enriched campaign_booklet artifact is rebuilt or republished, update registry checksums, sizes, and URLs in lockstep with this schema."
  },
  "extras": {
    "office_mapping": [
      {
        "office_id": 1,
        "office": "president",
        "description": "Presidential election"
      },
      {
        "office_id": 2,
        "office": "national_assembly",
        "description": "National Assembly election"
      },
      {
        "office_id": 3,
        "office": "edu_superintendent",
        "description": "Education superintendent"
      },
      {
        "office_id": 4,
        "office": "metro_head",
        "description": "Metropolitan city mayor / provincial governor"
      },
      {
        "office_id": 5,
        "office": "metro_assembly",
        "description": "Metropolitan assembly member"
      },
      {
        "office_id": 6,
        "office": "basic_head",
        "description": "Basic local government head"
      },
      {
        "office_id": 7,
        "office": "basic_assembly",
        "description": "Basic assembly member"
      }
    ],
    "row_universe": "Same document-row universe as the original campaign_booklet CSV source; some rows have missing code values.",
    "linkage_fields": [
      {
        "name": "huboid",
        "role": "linked_nec_candidate_identifier",
        "nullable": true
      },
      {
        "name": "sg_id",
        "role": "linked_nec_election_identifier",
        "nullable": true
      },
      {
        "name": "sg_typecode",
        "role": "linked_nec_election_type_identifier",
        "nullable": true
      },
      {
        "name": "link_status",
        "role": "linkage_status",
        "nullable": false
      },
      {
        "name": "matcher_version",
        "role": "linkage_provenance",
        "nullable": true
      },
      {
        "name": "nec_snapshot_id",
        "role": "linkage_provenance",
        "nullable": true
      }
    ]
  }
}
