"""
CSV import/export for KST objects.
Supports three standard CSV formats:
- **Skill map matrix**: rows=items, cols=skills, binary (μ: items→skills).
- **Prerequisite matrix**: rows=labels, cols=labels, binary (surmise relation).
- **Knowledge structure**: state_size, state_id, then binary columns per item.
All CSV files use the first column as row index and the first row as header.
"""
from __future__ import annotations
import csv
from pathlib import Path
from typing import Union
from knowledgespaces.derivation.skill_map import SkillMap
from knowledgespaces.structures.knowledge_structure import KnowledgeStructure
from knowledgespaces.structures.relations import SurmiseRelation
PathLike = Union[str, Path]
def _validate_unique(labels: list[str], context: str) -> None:
"""Validate that all labels in a list are unique."""
seen: set[str] = set()
for label in labels:
if label in seen:
raise ValueError(f"{context}: duplicate label '{label}'.")
seen.add(label)
def _validate_binary_row(
row: list[str],
expected_cols: int,
row_label: str,
context: str,
) -> None:
"""Validate that a CSV data row has the right width and only 0/1 values."""
actual = len(row) - 1 # exclude the row-label column
if actual != expected_cols:
raise ValueError(
f"{context}: row '{row_label}' has {actual} data columns, expected {expected_cols}."
)
for j, val in enumerate(row[1:]):
if val not in ("0", "1"):
raise ValueError(
f"{context}: row '{row_label}', column {j}: value {val!r} is not '0' or '1'."
)
# ------------------------------------------------------------------
# Skill map
# ------------------------------------------------------------------
[docs]
def read_skill_map(path: PathLike) -> SkillMap:
"""Read a skill map from CSV.
Expected format::
,skill1,skill2,...
item1,0,1,...
item2,1,0,...
Raises
------
ValueError
If rows have wrong column count or non-binary values.
"""
with open(path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
skills = header[1:]
if not skills:
raise ValueError("Skill map CSV: header has no skill columns.")
_validate_unique(skills, "Skill map CSV header")
n_skills = len(skills)
items: list[str] = []
mapping: dict[str, frozenset[str]] = {}
for line_no, row in enumerate(reader, start=2):
if not row:
continue
item = row[0]
_validate_binary_row(row, n_skills, item, f"Skill map CSV line {line_no}")
items.append(item)
required = frozenset(skills[j] for j, val in enumerate(row[1:]) if val == "1")
mapping[item] = required
if not items:
raise ValueError("Skill map CSV: no data rows.")
_validate_unique(items, "Skill map CSV row labels")
return SkillMap(items, skills, mapping)
[docs]
def write_skill_map(skill_map: SkillMap, path: PathLike) -> None:
"""Write a skill map to CSV."""
items, skills, matrix = skill_map.to_matrix()
with open(path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["", *skills])
for i, item in enumerate(items):
writer.writerow([item, *(str(v) for v in matrix[i])])
# ------------------------------------------------------------------
# Surmise relation (prerequisite matrix)
# ------------------------------------------------------------------
[docs]
def read_relation(path: PathLike) -> SurmiseRelation:
"""Read a surmise relation from a CSV prerequisite matrix.
Expected format::
,label1,label2,...
label1,0,1,...
label2,0,0,...
Row labels must match column labels exactly.
Raises
------
ValueError
If rows have wrong column count, non-binary values,
or row labels don't match header labels.
"""
with open(path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
labels = header[1:]
if not labels:
raise ValueError("Prerequisite CSV: header has no label columns.")
_validate_unique(labels, "Prerequisite CSV header")
n_labels = len(labels)
label_set = set(labels)
row_labels: list[str] = []
relations: set[tuple[str, str]] = set()
for line_no, row in enumerate(reader, start=2):
if not row:
continue
row_label = row[0]
if row_label not in label_set:
raise ValueError(
f"Prerequisite CSV line {line_no}: row label "
f"'{row_label}' is not in the header labels {sorted(label_set)}."
)
_validate_binary_row(row, n_labels, row_label, f"Prerequisite CSV line {line_no}")
if row_label in row_labels:
raise ValueError(
f"Prerequisite CSV line {line_no}: duplicate row label '{row_label}'."
)
row_labels.append(row_label)
for j, val in enumerate(row[1:]):
if val == "1":
relations.add((row_label, labels[j]))
if set(row_labels) != label_set:
missing = label_set - set(row_labels)
raise ValueError(f"Prerequisite CSV: missing rows for labels: {sorted(missing)}.")
return SurmiseRelation(labels, relations)
[docs]
def write_relation(relation: SurmiseRelation, path: PathLike) -> None:
"""Write a surmise relation to a CSV prerequisite matrix."""
items, matrix = relation.to_matrix()
with open(path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["", *items])
for i, item in enumerate(items):
writer.writerow([item, *(str(v) for v in matrix[i])])
# ------------------------------------------------------------------
# Knowledge structure (state matrix)
# ------------------------------------------------------------------
[docs]
def read_structure(path: PathLike) -> KnowledgeStructure:
"""Read a knowledge structure from CSV.
Expected format::
state_size,state_id,item1,item2,...
0,0,0,0,...
1,1,1,0,...
Raises
------
ValueError
If rows have wrong column count or non-binary item values.
"""
with open(path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
header = next(reader)
if len(header) < 3:
raise ValueError(
"Structure CSV: header must have at least "
"state_size, state_id, and one item column."
)
items = header[2:]
_validate_unique(items, "Structure CSV header items")
n_items = len(items)
states: list[frozenset[str]] = []
for line_no, row in enumerate(reader, start=2):
if not row:
continue
actual_data = len(row) - 2 # skip state_size, state_id
if actual_data != n_items:
raise ValueError(
f"Structure CSV line {line_no}: {actual_data} item columns, expected {n_items}."
)
for j, val in enumerate(row[2:]):
if val not in ("0", "1"):
raise ValueError(
f"Structure CSV line {line_no}, item '{items[j]}': "
f"value {val!r} is not '0' or '1'."
)
state = frozenset(items[j] for j, val in enumerate(row[2:]) if val == "1")
states.append(state)
return KnowledgeStructure(items, states)
[docs]
def write_structure(structure: KnowledgeStructure, path: PathLike) -> None:
"""Write a knowledge structure to CSV."""
items = sorted(structure.domain)
sorted_states = sorted(structure.states, key=lambda s: (len(s), sorted(s)))
with open(path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["state_size", "state_id", *items])
for idx, state in enumerate(sorted_states):
writer.writerow(
[
str(len(state)),
str(idx),
*("1" if item in state else "0" for item in items),
]
)