"""
Basic Local Independence Model (BLIM) and Bayesian state inference.
The BLIM defines the probability of a response pattern given a
knowledge state, using two parameters:
- beta (slip): P(incorrect | item mastered)
- eta (guess): P(correct | item not mastered)
The model assumes local independence: responses to different items
are conditionally independent given the knowledge state.
StatePosterior maintains the Bayesian probability distribution
over knowledge states and supports sequential updating.
References:
Doignon, J.-P., & Falmagne, J.-C. (1999).
Knowledge Spaces, Chapter 12. Springer-Verlag.
Falmagne, J.-C., & Doignon, J.-P. (2011).
Learning Spaces, Chapter 12. Springer-Verlag.
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
from knowledgespaces.structures.knowledge_structure import KnowledgeStructure
[docs]
@dataclass(frozen=True)
class BLIMParams:
"""Parameters for the Basic Local Independence Model.
Parameters
----------
beta : float or dict[str, float]
Slip probability: P(incorrect response | item mastered).
A scalar applies the same value to every item; a dict maps
each item to its own slip probability. Values must be in [0, 1).
eta : float or dict[str, float]
Guess probability: P(correct response | item not mastered).
A scalar applies the same value to every item; a dict maps
each item to its own guess probability. Values must be in [0, 1).
The constraint beta + eta < 1 (per item, when dicts are used)
ensures model identifiability: a mastered item must be more likely
to get a correct response than an unmastered one.
"""
beta: float | dict[str, float]
eta: float | dict[str, float]
def __post_init__(self) -> None:
_validate_param(self.beta, "beta")
_validate_param(self.eta, "eta")
# Joint constraint: checked per-item in BLIM.__init__ when
# the domain is known; for two scalars we can check immediately.
if (
isinstance(self.beta, (int, float))
and isinstance(self.eta, (int, float))
and self.beta + self.eta >= 1
):
raise ValueError(
f"beta + eta must be < 1 for identifiability, "
f"got {self.beta} + {self.eta} = {self.beta + self.eta}"
)
def _validate_param(value: float | dict[str, float], name: str) -> None:
"""Validate a scalar or per-item BLIM parameter."""
if isinstance(value, dict):
for item, v in value.items():
if not (0 <= v < 1):
raise ValueError(f"{name}['{item}'] must be in [0, 1), got {v}")
else:
if not (0 <= value < 1):
raise ValueError(f"{name} must be in [0, 1), got {value}")
def _resolve_param(
value: float | dict[str, float], domain: frozenset[str], name: str
) -> dict[str, float]:
"""Expand a scalar or dict param into a per-item dict."""
if isinstance(value, dict):
missing = domain - set(value)
if missing:
raise ValueError(f"{name} dict is missing items: {missing}")
return {item: value[item] for item in domain}
return {item: float(value) for item in domain}
[docs]
class BLIM:
"""Basic Local Independence Model on a knowledge structure.
Parameters
----------
structure : KnowledgeStructure
The knowledge structure defining valid states.
params : BLIMParams
Slip and guess parameters (scalar or per-item dict).
"""
__slots__ = ("_beta", "_eta", "_params", "_states_list", "_structure")
def __init__(self, structure: KnowledgeStructure, params: BLIMParams) -> None:
self._structure = structure
self._params = params
self._states_list: list[frozenset[str]] = sorted(
structure.states,
key=lambda s: (len(s), sorted(s)),
)
# Resolve per-item beta/eta dicts
self._beta: dict[str, float] = _resolve_param(params.beta, structure.domain, "beta")
self._eta: dict[str, float] = _resolve_param(params.eta, structure.domain, "eta")
# Validate joint constraint per item
for item in structure.domain:
b, e = self._beta[item], self._eta[item]
if b + e >= 1:
raise ValueError(
f"beta + eta must be < 1 for identifiability on item "
f"'{item}', got {b} + {e} = {b + e}"
)
@property
def structure(self) -> KnowledgeStructure:
return self._structure
@property
def params(self) -> BLIMParams:
return self._params
@property
def n_states(self) -> int:
return len(self._states_list)
@property
def states(self) -> list[frozenset[str]]:
return list(self._states_list)
def _validate_item(self, item: str) -> None:
if item not in self._structure.domain:
raise ValueError(f"Item '{item}' is not in the structure's domain.")
[docs]
def likelihood(
self,
item: str,
response: bool,
state: frozenset[str],
) -> float:
"""Compute P(response | state) for a single item and state.
Parameters
----------
item : str
The item being assessed.
response : bool
True for correct, False for incorrect.
state : frozenset[str]
The knowledge state. Must be a state in the structure.
Raises
------
ValueError
If item is not in the domain or state is not in the structure.
"""
self._validate_item(item)
if state not in self._structure.states:
raise ValueError(f"State {set(state)} is not in the structure.")
mastered = item in state
beta, eta = self._beta[item], self._eta[item]
if response: # correct
return (1 - beta) if mastered else eta
else: # incorrect
return beta if mastered else (1 - eta)
[docs]
def likelihood_vector(self, item: str, response: bool) -> np.ndarray:
"""Compute P(response | state) for all states.
Returns a 1D array of length n_states, where element i is
P(response | states[i]).
Raises
------
ValueError
If item is not in the domain.
"""
self._validate_item(item)
# Inline likelihood computation (skip per-call state validation
# since _states_list is guaranteed valid)
beta, eta = self._beta[item], self._eta[item]
result = np.empty(len(self._states_list))
for i, state in enumerate(self._states_list):
mastered = item in state
if response:
result[i] = (1 - beta) if mastered else eta
else:
result[i] = beta if mastered else (1 - eta)
return result
[docs]
class StatePosterior:
"""Bayesian probability distribution over knowledge states.
Immutable: update() returns a new StatePosterior.
Parameters
----------
blim : BLIM
The BLIM model defining states and likelihoods.
probabilities : np.ndarray
Probability for each state. Must sum to 1.
"""
__slots__ = ("_blim", "_probs")
def __init__(self, blim: BLIM, probabilities: np.ndarray) -> None:
if len(probabilities) != blim.n_states:
raise ValueError(f"Expected {blim.n_states} probabilities, got {len(probabilities)}")
if np.any(probabilities < 0) or np.any(probabilities > 1):
raise ValueError("All probabilities must be in [0, 1].")
total = probabilities.sum()
if not np.isclose(total, 1.0):
raise ValueError(f"Probabilities must sum to 1, got {total}")
self._blim = blim
self._probs = probabilities.copy()
self._probs.flags.writeable = False
[docs]
@classmethod
def uniform(cls, blim: BLIM) -> StatePosterior:
"""Create a uniform prior (maximum uncertainty)."""
n = blim.n_states
return cls(blim, np.full(n, 1.0 / n))
[docs]
@classmethod
def from_prior(cls, blim: BLIM, prior: dict[frozenset[str], float]) -> StatePosterior:
"""Create from an explicit prior mapping states to probabilities.
Parameters
----------
blim : BLIM
The BLIM model.
prior : dict[frozenset[str], float]
Mapping from each state to its prior probability.
Must cover all states and sum to 1.
"""
states = blim.states
missing = set(states) - set(prior)
if missing:
raise ValueError(f"Prior is missing states: {[set(s) for s in missing]}")
probs = np.array([prior[s] for s in states])
return cls(blim, probs)
@property
def blim(self) -> BLIM:
return self._blim
@property
def probabilities(self) -> np.ndarray:
return self._probs
@property
def states(self) -> list[frozenset[str]]:
return self._blim.states
# ------------------------------------------------------------------
# Bayesian update
# ------------------------------------------------------------------
[docs]
def update(self, item: str, response: bool) -> StatePosterior:
"""Return a new posterior after observing a response.
Applies Bayes' theorem:
P(state | response) ∝ P(response | state) × P(state)
Parameters
----------
item : str
The item that was assessed. Must be in the structure's domain.
response : bool
True for correct, False for incorrect.
Raises
------
ValueError
If item is not in the domain, or if the observation has
zero probability under all states with nonzero prior
(impossible evidence).
"""
likelihoods = self._blim.likelihood_vector(item, response)
unnormalized = likelihoods * self._probs
normalizer = unnormalized.sum()
if normalizer == 0:
raise ValueError(
f"Impossible evidence: P(item='{item}', "
f"response={response}) = 0 under current posterior. "
f"This indicates a model/data inconsistency."
)
posterior = unnormalized / normalizer
return StatePosterior(self._blim, posterior)
# ------------------------------------------------------------------
# Queries on the distribution
# ------------------------------------------------------------------
@property
def entropy(self) -> float:
"""Shannon entropy (bits) of the current distribution."""
p = self._probs[self._probs > 0]
return float(-np.sum(p * np.log2(p)))
@property
def most_likely_state(self) -> tuple[frozenset[str], float]:
"""Return (state, probability) of the most probable state."""
idx = int(np.argmax(self._probs))
return self._blim.states[idx], float(self._probs[idx])
[docs]
def marginal_mastery(self) -> dict[str, float]:
"""Marginal probability of mastering each item.
For each item q: P(q mastered) = sum of P(state) for all
states containing q.
"""
result: dict[str, float] = {}
for item in self._blim.structure.domain:
prob = sum(self._probs[i] for i, state in enumerate(self._blim.states) if item in state)
result[item] = float(prob)
return result
[docs]
def shannon_entropy(probs: np.ndarray) -> float:
"""Shannon entropy in bits, handling zero probabilities."""
p = probs[probs > 0]
return float(-np.sum(p * np.log2(p)))