Source code for knowledgespaces.structures.surmise_function

"""
Surmise functions (generalized surmise relations with multiple clauses).

A surmise function σ on a domain Q maps each item q to a family of
subsets of Q called *clauses*. Each clause represents a possible minimal
foundation for the mastery of q. When every item has exactly one clause,
the surmise function reduces to a surmise relation (partial order).

The four axioms of a surmise function (Definition 5.1.2):

(i)   σ(q) ≠ ∅ for all q           — at least one clause per item
(ii)  q ∈ C for all C ∈ σ(q)       — each clause contains its item
(iii) if q' ∈ C ∈ σ(q), then
      ∃ C' ∈ σ(q') with C' ⊆ C     — refinement (transitivity analogue)
(iv)  clauses for q are incomparable — no clause is a subset of another

References:
    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces, Chapter 5 — Surmise Systems. Springer-Verlag.
"""

from __future__ import annotations

from collections.abc import Collection, Mapping
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from knowledgespaces.structures.knowledge_structure import KnowledgeStructure
    from knowledgespaces.structures.relations import SurmiseRelation


[docs] class SurmiseFunction: """A surmise function on a domain of items. Maps each item q to a family of clauses σ(q), where each clause is a frozenset of items representing an alternative foundation for mastering q. Parameters ---------- domain : Collection[str] The set of all items. clauses : Mapping[str, Collection[Collection[str]]] For each item, the collection of its clauses. Each clause is a collection of item names. Raises ------ ValueError If any of the four surmise function axioms is violated, or if clauses reference items outside the domain. Examples -------- >>> sf = SurmiseFunction( ... {"a", "b", "c", "d", "e"}, ... { ... "a": [{"a"}], ... "b": [{"b", "d"}, {"a", "b", "c"}, {"b", "c", "e"}], ... "c": [{"a", "b", "c"}, {"b", "c", "e"}], ... "d": [{"b", "d"}], ... "e": [{"b", "c", "e"}], ... }, ... ) """ __slots__ = ("_clauses", "_domain") def __init__( self, domain: Collection[str], clauses: Mapping[str, Collection[Collection[str]]], ) -> None: self._domain: frozenset[str] = frozenset(domain) # Build internal representation parsed: dict[str, frozenset[frozenset[str]]] = {} for q in self._domain: if q not in clauses: raise ValueError(f"Axiom (i) violated: no clauses provided for item {q!r}") raw = clauses[q] family: set[frozenset[str]] = set() for c in raw: fs = frozenset(c) extra = fs - self._domain if extra: raise ValueError( f"Clause {set(fs)} for item {q!r} contains items " f"outside the domain: {set(extra)}" ) family.add(fs) if not family: raise ValueError(f"Axiom (i) violated: sigma({q!r}) is empty") parsed[q] = frozenset(family) # Validate axiom (ii): q ∈ C for all C ∈ σ(q) for q, family in parsed.items(): for c in family: if q not in c: raise ValueError( f"Axiom (ii) violated: clause {set(c)} for item " f"{q!r} does not contain {q!r}" ) # Validate axiom (iii): refinement for q, family in parsed.items(): for c in family: for qp in c: if not any(cp <= c for cp in parsed[qp]): raise ValueError( f"Axiom (iii) violated: item {qp!r} is in clause " f"{set(c)} for {q!r}, but no clause for {qp!r} " f"is a subset of {set(c)}" ) # Validate axiom (iv): antichain (incomparable clauses) for q, family in parsed.items(): clause_list = list(family) for i, c1 in enumerate(clause_list): for c2 in clause_list[i + 1 :]: if c1 < c2 or c2 < c1: smaller, larger = (c1, c2) if c1 < c2 else (c2, c1) raise ValueError( f"Axiom (iv) violated: clauses for {q!r} are not " f"incomparable — {set(smaller)}{set(larger)}" ) self._clauses: dict[str, frozenset[frozenset[str]]] = parsed # ------------------------------------------------------------------ # Properties # ------------------------------------------------------------------ @property def domain(self) -> frozenset[str]: """The item domain Q.""" return self._domain @property def n_items(self) -> int: """Number of items in the domain.""" return len(self._domain) @property def is_ordinal(self) -> bool: """True if every item has exactly one clause. When ordinal, the surmise function is equivalent to a surmise relation (partial order). """ return all(len(f) == 1 for f in self._clauses.values()) @property def is_discriminative(self) -> bool: """True if distinct items have distinct clause families. σ(q) = σ(q') implies q = q' (Definition 5.1.2). """ seen: list[frozenset[frozenset[str]]] = [] for family in self._clauses.values(): if family in seen: return False seen.append(family) return True @property def is_acyclic(self) -> bool: """True if the relation Rσ is acyclic (Definition 5.6.12). Rσ is defined by: q Rσ q' ⟺ ∃ C ∈ σ(q') : q ∈ C. Acyclicity means no cycle q1 Rσ q2 Rσ ... Rσ q1 with q1 ≠ qk. """ # Build adjacency: successors[q] = {q' : q Rσ q'} successors: dict[str, set[str]] = {q: set() for q in self._domain} for qp, family in self._clauses.items(): for c in family: for q in c: if q != qp: successors[q].add(qp) # DFS cycle detection WHITE, GRAY, BLACK = 0, 1, 2 color: dict[str, int] = {q: WHITE for q in self._domain} def has_cycle(node: str) -> bool: color[node] = GRAY for nb in successors[node]: if color[nb] == GRAY: return True if color[nb] == WHITE and has_cycle(nb): return True color[node] = BLACK return False return not any(has_cycle(q) for q in self._domain if color[q] == WHITE) # ------------------------------------------------------------------ # Query methods # ------------------------------------------------------------------
[docs] def clauses_for(self, item: str) -> frozenset[frozenset[str]]: """Return the clause family σ(item). Parameters ---------- item : str An item in the domain. Returns ------- frozenset[frozenset[str]] The set of clauses for *item*. """ if item not in self._domain: raise ValueError(f"Item {item!r} is not in the domain") return self._clauses[item]
[docs] def precedence_relation(self) -> set[tuple[str, str]]: """The precedence relation: r ≺ q ⟺ r ∈ ∩σ(q). An item r precedes q iff r belongs to *every* clause for q. This is always a quasi order and generalizes the surmise relation (Definition 3.7.1 / §5.6.9). """ pairs: set[tuple[str, str]] = set() for q, family in self._clauses.items(): intersection = frozenset.intersection(*family) for r in intersection: if r != q: pairs.add((r, q)) return pairs
# ------------------------------------------------------------------ # Conversion: σ → Knowledge Space (Eq. 5.2) # ------------------------------------------------------------------
[docs] def to_knowledge_space(self) -> KnowledgeStructure: """Derive the knowledge space from this surmise function. A set K ⊆ Q is a state iff for every q ∈ K, there exists a clause C ∈ σ(q) with C ⊆ K (Eq. 5.2, Definition 5.2.3). The result is always a knowledge space (closed under union). When the surmise function satisfies all four axioms, each clause is an atom and the space is granular. Returns ------- KnowledgeStructure The derived knowledge space. References ---------- Falmagne & Doignon (2011), Theorem 5.2.5. """ from knowledgespaces.structures.knowledge_structure import ( KnowledgeStructure, ) items = sorted(self._domain) n = len(items) # Generate candidate states via union of clauses. # All clauses form the base; states = unions of clauses + ∅. # Collect all clauses first. all_clauses: list[frozenset[str]] = [] for family in self._clauses.values(): for c in family: all_clauses.append(c) # For small domains, enumerate powerset and filter. # For larger domains, build states as unions of clauses. if n <= 20: # Powerset approach (exact, guaranteed complete) from itertools import chain, combinations states: list[frozenset[str]] = [] for subset in chain.from_iterable(combinations(items, r) for r in range(n + 1)): candidate = frozenset(subset) if self._is_state(candidate): states.append(candidate) return KnowledgeStructure(items, states) else: # Union-of-clauses approach for large domains # States are exactly unions of compatible clause sets states_set: set[frozenset[str]] = {frozenset()} # BFS: start from clauses, keep taking unions frontier = set(all_clauses) visited: set[frozenset[str]] = set() while frontier: new_frontier: set[frozenset[str]] = set() for candidate in frontier: if candidate in visited: continue visited.add(candidate) if self._is_state(candidate): states_set.add(candidate) # Try adding each clause for c in all_clauses: union = candidate | c if union not in visited: new_frontier.add(union) frontier = new_frontier return KnowledgeStructure(items, states_set)
def _is_state(self, candidate: frozenset[str]) -> bool: """Check if candidate satisfies the state condition (Eq. 5.2).""" return all(any(c <= candidate for c in self._clauses[q]) for q in candidate) # ------------------------------------------------------------------ # Conversion: σ → SurmiseRelation (ordinal case only) # ------------------------------------------------------------------
[docs] def to_surmise_relation(self) -> SurmiseRelation: """Convert to a SurmiseRelation (only valid for ordinal case). When each item has exactly one clause, the surmise function is equivalent to a quasi order (Definition 5.1.4). Returns ------- SurmiseRelation The equivalent surmise relation. Raises ------ ValueError If the surmise function is not ordinal. """ from knowledgespaces.structures.relations import SurmiseRelation if not self.is_ordinal: raise ValueError( "Cannot convert to SurmiseRelation: surmise function " "has items with multiple clauses (not ordinal)" ) pairs: set[tuple[str, str]] = set() for q, family in self._clauses.items(): (clause,) = family # exactly one for r in clause: if r != q: pairs.add((r, q)) return SurmiseRelation(self._domain, pairs)
# ------------------------------------------------------------------ # Factories # ------------------------------------------------------------------
[docs] @classmethod def from_knowledge_space( cls, ks: KnowledgeStructure, ) -> SurmiseFunction: """Derive the surmise function from a granular knowledge space. For each item q, σ(q) = {atoms at q in K} (Definition 5.2.1). Parameters ---------- ks : KnowledgeStructure Must be a knowledge space (union-closed). The space must be granular (every item has at least one atom). Returns ------- SurmiseFunction Raises ------ ValueError If the structure is not a knowledge space, or if some item has no atom (non-granular). References ---------- Falmagne & Doignon (2011), Definition 5.2.1, Theorem 5.2.5. """ if not ks.is_knowledge_space: raise ValueError( "Cannot derive surmise function: structure is not " "a knowledge space (not closed under union)" ) atoms = ks.atoms() clauses: dict[str, list[frozenset[str]]] = {} for q in ks.domain: item_atoms = atoms.get(q, []) if not item_atoms: raise ValueError(f"Knowledge space is not granular: item {q!r} has no atom") clauses[q] = item_atoms return cls(ks.domain, clauses)
[docs] @classmethod def from_surmise_relation( cls, relation: SurmiseRelation, ) -> SurmiseFunction: """Cast a surmise relation as a surmise function. Each item q gets a single clause: {q} ∪ prerequisites(q) in the transitive closure (Definition 5.1.4). Parameters ---------- relation : SurmiseRelation A surmise relation (partial order on items). Returns ------- SurmiseFunction An ordinal surmise function (one clause per item). """ closure = relation.transitive_closure() clauses: dict[str, list[frozenset[str]]] = {} for q in closure.items: prereqs = closure.prerequisites_of(q) clause = frozenset({q}) | prereqs clauses[q] = [clause] return cls(closure.items, clauses)
# ------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------ def __eq__(self, other: object) -> bool: if not isinstance(other, SurmiseFunction): return NotImplemented return self._domain == other._domain and self._clauses == other._clauses def __repr__(self) -> str: n_clauses = sum(len(f) for f in self._clauses.values()) return f"SurmiseFunction(items={self.n_items}, clauses={n_clauses})"