Source code for knowledgespaces.structures.knowledge_structure

"""
Knowledge structures, spaces, and learning spaces.

A knowledge structure on a domain Q is a family K of subsets of Q
(called knowledge states) that contains at least the empty set and Q itself.

Special cases:
- Knowledge space: closed under set union.
- Closure space: closed under set intersection.
- Learning space: a well-graded knowledge space (equivalently, an antimatroid).

References:
    Doignon, J.-P., & Falmagne, J.-C. (1999).
    Knowledge Spaces. Springer-Verlag.

    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces. Springer-Verlag.
"""

from __future__ import annotations

from collections.abc import Collection, Iterator
from itertools import chain, combinations
from typing import TYPE_CHECKING

from knowledgespaces._limits import DEFAULT_MAX_N_ITEMS, check_domain_size
from knowledgespaces.structures.relations import SurmiseRelation

if TYPE_CHECKING:
    from knowledgespaces.structures.surmise_function import SurmiseFunction



[docs]
class KnowledgeStructure:
    """A family of knowledge states over a domain of items.

    Parameters
    ----------
    domain : Collection[str]
        The set of all items. Must be non-empty: the knowledge-structure
        axioms (Falmagne & Doignon 2011, Def. 1.1.1) require a nonempty
        domain Q, and every downstream algorithm in this package (BLIM
        EM, QUERY, assessment) is undefined for ``|Q| = 0``. An empty
        domain raises ``ValueError``.
    states : Collection[Collection[str]]
        The knowledge states (subsets of domain). The empty set and
        the full domain are added automatically if missing.
    """

    __slots__ = ("_domain", "_states")

    def __init__(
        self,
        domain: Collection[str],
        states: Collection[Collection[str]],
    ) -> None:
        self._domain: frozenset[str] = frozenset(domain)
        if len(self._domain) == 0:
            raise ValueError(
                "KnowledgeStructure requires a non-empty domain. "
                "The axioms (Falmagne & Doignon 2011, Def. 1.1.1) are "
                "defined on a nonempty set Q, and all downstream "
                "algorithms (BLIM EM, QUERY, assessment) degenerate "
                "silently on |Q|=0."
            )
        raw: set[frozenset[str]] = set()
        for s in states:
            fs = frozenset(s)
            if not fs.issubset(self._domain):
                extra = fs - self._domain
                raise ValueError(f"State {set(fs)} contains items outside the domain: {set(extra)}")
            raw.add(fs)
        # Ensure ∅ and Q are present (axioms of a knowledge structure)
        raw.add(frozenset())
        raw.add(self._domain)
        self._states: frozenset[frozenset[str]] = frozenset(raw)

    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------

    @property
    def domain(self) -> frozenset[str]:
        return self._domain

    @property
    def states(self) -> frozenset[frozenset[str]]:
        return self._states

    @property
    def n_items(self) -> int:
        return len(self._domain)

    @property
    def n_states(self) -> int:
        return len(self._states)

    # ------------------------------------------------------------------
    # Structural properties
    # ------------------------------------------------------------------

    @property
    def is_knowledge_space(self) -> bool:
        """True if closed under union."""
        for s1 in self._states:
            for s2 in self._states:
                if (s1 | s2) not in self._states:
                    return False
        return True

    @property
    def is_closure_space(self) -> bool:
        """True if closed under intersection."""
        for s1 in self._states:
            for s2 in self._states:
                if (s1 & s2) not in self._states:
                    return False
        return True

    @property
    def is_well_graded(self) -> bool:
        """True if every non-empty state has an immediate predecessor.

        A state K has an immediate predecessor if there exists q in K
        such that K \\ {q} is also a state.

        .. warning::

           This check verifies only the **local** well-gradedness
           condition (every state has at least one single-item
           predecessor). It is equivalent to the full definition
           (Falmagne & Doignon, 2011, Def. 1.44 / Thm 1.49) **only if**
           :attr:`is_knowledge_space` is ``True``. For bare
           :class:`KnowledgeStructure` instances that are not
           union-closed, the result may be a **false positive** with
           respect to the textbook definition, which additionally
           requires a tight path between every pair of comparable
           states. Prefer :attr:`is_learning_space`, which enforces
           both conditions, whenever well-gradedness is needed as a
           standalone guarantee.
        """
        for state in self._states:
            if not state:
                continue
            if not any(state - {q} in self._states for q in state):
                return False
        return True

    @property
    def is_accessible(self) -> bool:
        """True if every state is reachable from the empty set.

        Reachability means there exists a chain from ∅ to the state
        where each step adds exactly one item.
        """
        accessible: set[frozenset[str]] = {frozenset()}
        changed = True
        while changed:
            changed = False
            for state in self._states:
                if state in accessible:
                    continue
                for q in state:
                    if state - {q} in accessible:
                        accessible.add(state)
                        changed = True
                        break
        return accessible == self._states

    @property
    def is_learning_space(self) -> bool:
        """True if this is a well-graded knowledge space.

        Equivalent to: knowledge space + well-graded.
        Equivalent to: no hanging states + ∅ and Q present.
        """
        return self.is_knowledge_space and self.is_well_graded

    # ------------------------------------------------------------------
    # Fringe operations
    # ------------------------------------------------------------------


[docs]
    def inner_fringe(self, state: frozenset[str]) -> frozenset[str]:
        """Items whose removal yields another valid state.

        These represent the 'most recently consolidated' items in the state.
        """
        return frozenset(q for q in state if state - {q} in self._states)



[docs]
    def outer_fringe(self, state: frozenset[str]) -> frozenset[str]:
        """Items whose addition yields another valid state.

        These represent the 'next learnable' items from this state.
        """
        return frozenset(q for q in self._domain - state if state | {q} in self._states)



[docs]
    def hanging_states(self) -> list[frozenset[str]]:
        """Non-empty states with empty inner fringe.

        A hanging state cannot be reached incrementally — it indicates
        a structural problem in the learning space.
        """
        return [s for s in self._states if s and not self.inner_fringe(s)]


    # ------------------------------------------------------------------
    # Analysis
    # ------------------------------------------------------------------


[docs]
    def atoms(self) -> dict[str, list[frozenset[str]]]:
        """Atoms of the knowledge structure.

        An atom at item q is a **minimal** state containing q — i.e. a
        state K such that q ∈ K and no proper subset of K that is also
        a state contains q. An item can have **more than one** atom.

        In a closure space (closed under intersection) each item has
        exactly one atom (the intersection of all states containing it).
        In a knowledge space (closed under union) the collection of all
        atoms equals the base.

        Returns
        -------
        dict[str, list[frozenset[str]]]
            Mapping from each item to the list of its atoms (minimal
            states containing that item), sorted by size then content.

        References
        ----------
        Doignon & Falmagne (1999), *Knowledge Spaces*, Def. 1.23.
        Falmagne & Doignon (2011), *Learning Spaces*, Chapter 1.
        """
        result: dict[str, list[frozenset[str]]] = {}
        for q in self._domain:
            states_with_q = [s for s in self._states if q in s]
            if not states_with_q:
                continue
            # Keep only minimal states (no proper subset also contains q)
            minimal: list[frozenset[str]] = []
            for candidate in states_with_q:
                if not any(other < candidate for other in states_with_q):
                    minimal.append(candidate)
            result[q] = sorted(minimal, key=lambda s: (len(s), sorted(s)))
        return result



[docs]
    def base(self) -> list[frozenset[str]]:
        """Minimal generating family under union.

        The base B of a knowledge space K is the smallest family such
        that K equals the closure of B under union (plus ∅).
        A state K is in the base iff it cannot be expressed as the
        union of states in K that are strictly contained in K.

        Only meaningful for knowledge spaces (closed under union).
        """
        result: list[frozenset[str]] = []
        for state in self._states:
            if not state:
                continue
            # Check if state = union of proper substates
            proper_substates = [s for s in self._states if s < state]
            union_of_substates: frozenset[str] = frozenset()
            for s in proper_substates:
                union_of_substates = union_of_substates | s
            if union_of_substates != state:
                result.append(state)
        return sorted(result, key=lambda s: (len(s), sorted(s)))



[docs]
    def states_by_size(self) -> dict[int, int]:
        """Distribution of states by cardinality."""
        dist: dict[int, int] = {}
        for state in self._states:
            n = len(state)
            dist[n] = dist.get(n, 0) + 1
        return dict(sorted(dist.items()))



[docs]
    def learning_paths(
        self, target: frozenset[str] | None = None, max_paths: int = 100
    ) -> list[list[frozenset[str]]]:
        """Find learning paths from ∅ to target.

        A learning path is a maximal chain where each step adds one item.

        Parameters
        ----------
        target : frozenset[str] or None
            Target state. Defaults to the full domain.
        max_paths : int
            Maximum number of paths to return (to avoid combinatorial explosion).
        """
        if target is None:
            target = self._domain
        target = frozenset(target)

        paths: list[list[frozenset[str]]] = []

        def _search(current: frozenset[str], path: list[frozenset[str]]) -> None:
            if len(paths) >= max_paths:
                return
            if current == target:
                paths.append(list(path))
                return
            for q in target - current:
                successor = current | {q}
                if successor in self._states:
                    path.append(successor)
                    _search(successor, path)
                    path.pop()

        _search(frozenset(), [frozenset()])
        return paths


    # ------------------------------------------------------------------
    # Factories
    # ------------------------------------------------------------------


[docs]
    @classmethod
    def from_surmise_relation(
        cls,
        relation: SurmiseRelation,
        *,
        max_items: int | None = None,
    ) -> KnowledgeStructure:
        """Build the ordinal knowledge structure from a surmise relation.

        A state is valid iff for every item in the state, all its
        prerequisites are also in the state (downward closure).

        The resulting structure is always closed under both union
        and intersection (it is a distributive lattice).

        This enumerates 2^|Q| subsets of the domain; `max_items`
        overrides the default hard limit for advanced callers.
        """
        closure = relation.transitive_closure()
        items = sorted(closure.items)

        check_domain_size(
            len(items),
            max_threshold=max_items if max_items is not None else DEFAULT_MAX_N_ITEMS,
            context="from_surmise_relation",
        )

        states: list[frozenset[str]] = []
        for subset in _powerset(items):
            state = frozenset(subset)
            valid = True
            for item in state:
                if not closure.prerequisites_of(item).issubset(state):
                    valid = False
                    break
            if valid:
                states.append(state)

        return cls(items, states)



[docs]
    @classmethod
    def from_states(cls, states: Collection[Collection[str]]) -> KnowledgeStructure:
        """Build from an explicit collection of states.

        The domain is inferred as the union of all states.
        """
        all_items: set[str] = set()
        for s in states:
            all_items.update(s)
        return cls(all_items, states)


    # ------------------------------------------------------------------
    # Set operations
    # ------------------------------------------------------------------


[docs]
    def intersection_with(self, other: KnowledgeStructure) -> KnowledgeStructure:
        """Intersection of two structures: states present in both."""
        if self._domain != other._domain:
            raise ValueError("Domains must match for intersection.")
        common = self._states & other._states
        return KnowledgeStructure(self._domain, common)



[docs]
    def projection(self, sub_domain: Collection[str]) -> KnowledgeStructure:
        """Project (restrict) the structure to a subset of items.

        For each state K, the projected state is K ∩ sub_domain.
        Only items present in the original domain are retained.

        Raises
        ------
        ValueError
            If sub_domain contains items not in the original domain.
        """
        sub = frozenset(sub_domain)
        extra = sub - self._domain
        if extra:
            raise ValueError(f"sub_domain contains items outside the domain: {set(extra)}")
        projected = {s & sub for s in self._states}
        return KnowledgeStructure(sub, projected)


    # ------------------------------------------------------------------
    # Surmise relation extraction
    # ------------------------------------------------------------------


[docs]
    def surmise_function(self) -> SurmiseFunction:
        """Derive the surmise function from this knowledge space.

        For each item q, σ(q) is the set of atoms at q (minimal states
        containing q). Requires a knowledge space (union-closed) where
        every item has at least one atom (granularity).

        Returns
        -------
        SurmiseFunction

        Raises
        ------
        ValueError
            If the structure is not a granular knowledge space.

        References
        ----------
        Falmagne & Doignon (2011), Definition 5.2.1, Theorem 5.2.5.
        """
        from knowledgespaces.structures.surmise_function import SurmiseFunction

        return SurmiseFunction.from_knowledge_space(self)



[docs]
    def surmise_relation(self) -> SurmiseRelation:
        """Extract the surmise relation implied by this structure.

        Item a is a prerequisite of b iff every state containing b
        also contains a.
        """
        items = sorted(self._domain)
        relations: set[tuple[str, str]] = set()
        for a in items:
            for b in items:
                if a == b:
                    continue
                if all(a in s for s in self._states if b in s):
                    relations.add((a, b))
        return SurmiseRelation(items, relations)


    # ------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------

    def __contains__(self, state: Collection[str]) -> bool:
        return frozenset(state) in self._states

    def __len__(self) -> int:
        return len(self._states)

    def __iter__(self) -> Iterator[frozenset[str]]:
        return iter(sorted(self._states, key=lambda s: (len(s), sorted(s))))

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, KnowledgeStructure):
            return NotImplemented
        return self._domain == other._domain and self._states == other._states

    def __repr__(self) -> str:
        return f"KnowledgeStructure(items={self.n_items}, states={self.n_states})"



# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------


def _powerset(items: list[str]) -> Iterator[tuple[str, ...]]:
    """Generate all subsets of items."""
    return chain.from_iterable(combinations(items, r) for r in range(len(items) + 1))