Source code for knowledgespaces.assessment.blim

"""
Basic Local Independence Model (BLIM) and Bayesian state inference.

The BLIM defines the probability of a response pattern given a
knowledge state, using two parameters:
- beta (slip): P(incorrect | item mastered)
- eta (guess): P(correct | item not mastered)

The model assumes local independence: responses to different items
are conditionally independent given the knowledge state.

StatePosterior maintains the Bayesian probability distribution
over knowledge states and supports sequential updating.

References:
    Doignon, J.-P., & Falmagne, J.-C. (1999).
    Knowledge Spaces, Chapter 12. Springer-Verlag.

    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces, Chapter 12. Springer-Verlag.
"""

from __future__ import annotations

from dataclasses import dataclass

import numpy as np

from knowledgespaces.structures.knowledge_structure import KnowledgeStructure


[docs] @dataclass(frozen=True) class BLIMParams: """Parameters for the Basic Local Independence Model. Parameters ---------- beta : float or dict[str, float] Slip probability: P(incorrect response | item mastered). A scalar applies the same value to every item; a dict maps each item to its own slip probability. Values must be in [0, 1). eta : float or dict[str, float] Guess probability: P(correct response | item not mastered). A scalar applies the same value to every item; a dict maps each item to its own guess probability. Values must be in [0, 1). The constraint beta + eta < 1 (per item, when dicts are used) ensures model identifiability: a mastered item must be more likely to get a correct response than an unmastered one. """ beta: float | dict[str, float] eta: float | dict[str, float] def __post_init__(self) -> None: _validate_param(self.beta, "beta") _validate_param(self.eta, "eta") # Joint constraint: checked per-item in BLIM.__init__ when # the domain is known; for two scalars we can check immediately. if ( isinstance(self.beta, (int, float)) and isinstance(self.eta, (int, float)) and self.beta + self.eta >= 1 ): raise ValueError( f"beta + eta must be < 1 for identifiability, " f"got {self.beta} + {self.eta} = {self.beta + self.eta}" )
def _validate_param(value: float | dict[str, float], name: str) -> None: """Validate a scalar or per-item BLIM parameter.""" if isinstance(value, dict): for item, v in value.items(): if not (0 <= v < 1): raise ValueError(f"{name}['{item}'] must be in [0, 1), got {v}") else: if not (0 <= value < 1): raise ValueError(f"{name} must be in [0, 1), got {value}") def _resolve_param( value: float | dict[str, float], domain: frozenset[str], name: str ) -> dict[str, float]: """Expand a scalar or dict param into a per-item dict.""" if isinstance(value, dict): missing = domain - set(value) if missing: raise ValueError(f"{name} dict is missing items: {missing}") return {item: value[item] for item in domain} return {item: float(value) for item in domain}
[docs] class BLIM: """Basic Local Independence Model on a knowledge structure. Parameters ---------- structure : KnowledgeStructure The knowledge structure defining valid states. params : BLIMParams Slip and guess parameters (scalar or per-item dict). """ __slots__ = ("_beta", "_eta", "_params", "_states_list", "_structure") def __init__(self, structure: KnowledgeStructure, params: BLIMParams) -> None: self._structure = structure self._params = params self._states_list: list[frozenset[str]] = sorted( structure.states, key=lambda s: (len(s), sorted(s)), ) # Resolve per-item beta/eta dicts self._beta: dict[str, float] = _resolve_param(params.beta, structure.domain, "beta") self._eta: dict[str, float] = _resolve_param(params.eta, structure.domain, "eta") # Validate joint constraint per item for item in structure.domain: b, e = self._beta[item], self._eta[item] if b + e >= 1: raise ValueError( f"beta + eta must be < 1 for identifiability on item " f"'{item}', got {b} + {e} = {b + e}" ) @property def structure(self) -> KnowledgeStructure: return self._structure @property def params(self) -> BLIMParams: return self._params @property def n_states(self) -> int: return len(self._states_list) @property def states(self) -> list[frozenset[str]]: return list(self._states_list) def _validate_item(self, item: str) -> None: if item not in self._structure.domain: raise ValueError(f"Item '{item}' is not in the structure's domain.")
[docs] def likelihood( self, item: str, response: bool, state: frozenset[str], ) -> float: """Compute P(response | state) for a single item and state. Parameters ---------- item : str The item being assessed. response : bool True for correct, False for incorrect. state : frozenset[str] The knowledge state. Must be a state in the structure. Raises ------ ValueError If item is not in the domain or state is not in the structure. """ self._validate_item(item) if state not in self._structure.states: raise ValueError(f"State {set(state)} is not in the structure.") mastered = item in state beta, eta = self._beta[item], self._eta[item] if response: # correct return (1 - beta) if mastered else eta else: # incorrect return beta if mastered else (1 - eta)
[docs] def likelihood_vector(self, item: str, response: bool) -> np.ndarray: """Compute P(response | state) for all states. Returns a 1D array of length n_states, where element i is P(response | states[i]). Raises ------ ValueError If item is not in the domain. """ self._validate_item(item) # Inline likelihood computation (skip per-call state validation # since _states_list is guaranteed valid) beta, eta = self._beta[item], self._eta[item] result = np.empty(len(self._states_list)) for i, state in enumerate(self._states_list): mastered = item in state if response: result[i] = (1 - beta) if mastered else eta else: result[i] = beta if mastered else (1 - eta) return result
[docs] class StatePosterior: """Bayesian probability distribution over knowledge states. Immutable: update() returns a new StatePosterior. Parameters ---------- blim : BLIM The BLIM model defining states and likelihoods. probabilities : np.ndarray Probability for each state. Must sum to 1. """ __slots__ = ("_blim", "_probs") def __init__(self, blim: BLIM, probabilities: np.ndarray) -> None: if len(probabilities) != blim.n_states: raise ValueError(f"Expected {blim.n_states} probabilities, got {len(probabilities)}") if np.any(probabilities < 0) or np.any(probabilities > 1): raise ValueError("All probabilities must be in [0, 1].") total = probabilities.sum() if not np.isclose(total, 1.0): raise ValueError(f"Probabilities must sum to 1, got {total}") self._blim = blim self._probs = probabilities.copy() self._probs.flags.writeable = False
[docs] @classmethod def uniform(cls, blim: BLIM) -> StatePosterior: """Create a uniform prior (maximum uncertainty).""" n = blim.n_states return cls(blim, np.full(n, 1.0 / n))
[docs] @classmethod def from_prior(cls, blim: BLIM, prior: dict[frozenset[str], float]) -> StatePosterior: """Create from an explicit prior mapping states to probabilities. Parameters ---------- blim : BLIM The BLIM model. prior : dict[frozenset[str], float] Mapping from each state to its prior probability. Must cover all states and sum to 1. """ states = blim.states missing = set(states) - set(prior) if missing: raise ValueError(f"Prior is missing states: {[set(s) for s in missing]}") probs = np.array([prior[s] for s in states]) return cls(blim, probs)
@property def blim(self) -> BLIM: return self._blim @property def probabilities(self) -> np.ndarray: return self._probs @property def states(self) -> list[frozenset[str]]: return self._blim.states # ------------------------------------------------------------------ # Bayesian update # ------------------------------------------------------------------
[docs] def update(self, item: str, response: bool) -> StatePosterior: """Return a new posterior after observing a response. Applies Bayes' theorem: P(state | response) ∝ P(response | state) × P(state) Parameters ---------- item : str The item that was assessed. Must be in the structure's domain. response : bool True for correct, False for incorrect. Raises ------ ValueError If item is not in the domain, or if the observation has zero probability under all states with nonzero prior (impossible evidence). """ likelihoods = self._blim.likelihood_vector(item, response) unnormalized = likelihoods * self._probs normalizer = unnormalized.sum() if normalizer == 0: raise ValueError( f"Impossible evidence: P(item='{item}', " f"response={response}) = 0 under current posterior. " f"This indicates a model/data inconsistency." ) posterior = unnormalized / normalizer return StatePosterior(self._blim, posterior)
# ------------------------------------------------------------------ # Queries on the distribution # ------------------------------------------------------------------ @property def entropy(self) -> float: """Shannon entropy (bits) of the current distribution.""" p = self._probs[self._probs > 0] return float(-np.sum(p * np.log2(p))) @property def most_likely_state(self) -> tuple[frozenset[str], float]: """Return (state, probability) of the most probable state.""" idx = int(np.argmax(self._probs)) return self._blim.states[idx], float(self._probs[idx])
[docs] def marginal_mastery(self) -> dict[str, float]: """Marginal probability of mastering each item. For each item q: P(q mastered) = sum of P(state) for all states containing q. """ result: dict[str, float] = {} for item in self._blim.structure.domain: prob = sum(self._probs[i] for i, state in enumerate(self._blim.states) if item in state) result[item] = float(prob) return result
[docs] def shannon_entropy(probs: np.ndarray) -> float: """Shannon entropy in bits, handling zero probabilities.""" p = probs[probs > 0] return float(-np.sum(p * np.log2(p)))