Source code for knowledgespaces.assessment.blim

"""
Basic Local Independence Model (BLIM) and Bayesian state inference.

The BLIM defines the probability of a response pattern given a
knowledge state, using two parameters:
- beta (slip): P(incorrect | item mastered)
- eta (guess): P(correct | item not mastered)

The model assumes local independence: responses to different items
are conditionally independent given the knowledge state.

StatePosterior maintains the Bayesian probability distribution
over knowledge states and supports sequential updating.

References:
    Doignon, J.-P., & Falmagne, J.-C. (1999).
    Knowledge Spaces, Chapter 12. Springer-Verlag.

    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces, Chapter 12. Springer-Verlag.
"""

from __future__ import annotations

from dataclasses import dataclass

import numpy as np

from knowledgespaces.structures.knowledge_structure import KnowledgeStructure



[docs]
@dataclass(frozen=True)
class BLIMParams:
    """Parameters for the Basic Local Independence Model.

    Parameters
    ----------
    beta : float or dict[str, float]
        Slip probability: P(incorrect response | item mastered).
        A scalar applies the same value to every item; a dict maps
        each item to its own slip probability. Values must be in [0, 1).
    eta : float or dict[str, float]
        Guess probability: P(correct response | item not mastered).
        A scalar applies the same value to every item; a dict maps
        each item to its own guess probability. Values must be in [0, 1).

    The constraint beta + eta < 1 (per item, when dicts are used) is the
    *informative item* condition: a mastered respondent must be more likely
    to give a correct response than an unmastered one. It is necessary for
    the item to be informative, but does not by itself guarantee model
    identifiability (which depends on the knowledge structure).
    """

    beta: float | dict[str, float]
    eta: float | dict[str, float]

    def __post_init__(self) -> None:
        _validate_param(self.beta, "beta")
        _validate_param(self.eta, "eta")
        # Joint constraint: checked per-item in BLIM.__init__ when
        # the domain is known; for two scalars we can check immediately.
        if (
            isinstance(self.beta, (int, float))
            and isinstance(self.eta, (int, float))
            and self.beta + self.eta >= 1
        ):
            raise ValueError(
                f"beta + eta must be < 1 (informative-item condition), "
                f"got {self.beta} + {self.eta} = {self.beta + self.eta}"
            )



def _validate_param(value: float | dict[str, float], name: str) -> None:
    """Validate a scalar or per-item BLIM parameter."""
    if isinstance(value, dict):
        for item, v in value.items():
            if not (0 <= v < 1):
                raise ValueError(f"{name}['{item}'] must be in [0, 1), got {v}")
    else:
        if not (0 <= value < 1):
            raise ValueError(f"{name} must be in [0, 1), got {value}")


def _resolve_param(
    value: float | dict[str, float], domain: frozenset[str], name: str
) -> dict[str, float]:
    """Expand a scalar or dict param into a per-item dict."""
    if isinstance(value, dict):
        missing = domain - set(value)
        if missing:
            raise ValueError(f"{name} dict is missing items: {missing}")
        return {item: value[item] for item in domain}
    return {item: float(value) for item in domain}



[docs]
class BLIM:
    """Basic Local Independence Model on a knowledge structure.

    Parameters
    ----------
    structure : KnowledgeStructure
        The knowledge structure defining valid states.
    params : BLIMParams
        Slip and guess parameters (scalar or per-item dict).
    """

    __slots__ = ("_beta", "_eta", "_params", "_states_list", "_structure")

    def __init__(self, structure: KnowledgeStructure, params: BLIMParams) -> None:
        self._structure = structure
        self._params = params
        self._states_list: list[frozenset[str]] = sorted(
            structure.states,
            key=lambda s: (len(s), sorted(s)),
        )
        # Resolve per-item beta/eta dicts
        self._beta: dict[str, float] = _resolve_param(params.beta, structure.domain, "beta")
        self._eta: dict[str, float] = _resolve_param(params.eta, structure.domain, "eta")
        # Validate joint constraint per item
        for item in structure.domain:
            b, e = self._beta[item], self._eta[item]
            if b + e >= 1:
                raise ValueError(
                    f"beta + eta must be < 1 (informative-item condition) on item "
                    f"'{item}', got {b} + {e} = {b + e}"
                )

    @property
    def structure(self) -> KnowledgeStructure:
        return self._structure

    @property
    def params(self) -> BLIMParams:
        return self._params

    @property
    def n_states(self) -> int:
        return len(self._states_list)

    @property
    def states(self) -> list[frozenset[str]]:
        return list(self._states_list)

    def _validate_item(self, item: str) -> None:
        if item not in self._structure.domain:
            raise ValueError(f"Item '{item}' is not in the structure's domain.")


[docs]
    def likelihood(
        self,
        item: str,
        response: bool,
        state: frozenset[str],
    ) -> float:
        """Compute P(response | state) for a single item and state.

        Parameters
        ----------
        item : str
            The item being assessed.
        response : bool
            True for correct, False for incorrect.
        state : frozenset[str]
            The knowledge state. Must be a state in the structure.

        Raises
        ------
        ValueError
            If item is not in the domain or state is not in the structure.
        """
        self._validate_item(item)
        if state not in self._structure.states:
            raise ValueError(f"State {set(state)} is not in the structure.")
        mastered = item in state
        beta, eta = self._beta[item], self._eta[item]

        if response:  # correct
            return (1 - beta) if mastered else eta
        else:  # incorrect
            return beta if mastered else (1 - eta)



[docs]
    def likelihood_vector(self, item: str, response: bool) -> np.ndarray:
        """Compute P(response | state) for all states.

        Returns a 1D array of length n_states, where element i is
        P(response | states[i]).

        Raises
        ------
        ValueError
            If item is not in the domain.
        """
        self._validate_item(item)
        # Inline likelihood computation (skip per-call state validation
        # since _states_list is guaranteed valid)
        beta, eta = self._beta[item], self._eta[item]
        result = np.empty(len(self._states_list))
        for i, state in enumerate(self._states_list):
            mastered = item in state
            if response:
                result[i] = (1 - beta) if mastered else eta
            else:
                result[i] = beta if mastered else (1 - eta)
        return result





[docs]
class StatePosterior:
    """Bayesian probability distribution over knowledge states.

    Immutable: update() returns a new StatePosterior.

    Parameters
    ----------
    blim : BLIM
        The BLIM model defining states and likelihoods.
    probabilities : np.ndarray
        Probability for each state. Must sum to 1.
    """

    __slots__ = ("_blim", "_probs")

    def __init__(self, blim: BLIM, probabilities: np.ndarray) -> None:
        if len(probabilities) != blim.n_states:
            raise ValueError(f"Expected {blim.n_states} probabilities, got {len(probabilities)}")
        if np.any(probabilities < 0) or np.any(probabilities > 1):
            raise ValueError("All probabilities must be in [0, 1].")
        total = probabilities.sum()
        if not np.isclose(total, 1.0):
            raise ValueError(f"Probabilities must sum to 1, got {total}")
        self._blim = blim
        self._probs = probabilities.copy()
        self._probs.flags.writeable = False


[docs]
    @classmethod
    def uniform(cls, blim: BLIM) -> StatePosterior:
        """Create a uniform prior (maximum uncertainty)."""
        n = blim.n_states
        return cls(blim, np.full(n, 1.0 / n))



[docs]
    @classmethod
    def from_prior(cls, blim: BLIM, prior: dict[frozenset[str], float]) -> StatePosterior:
        """Create from an explicit prior mapping states to probabilities.

        Parameters
        ----------
        blim : BLIM
            The BLIM model.
        prior : dict[frozenset[str], float]
            Mapping from each state to its prior probability.
            Must cover exactly the structure's states (no missing, no
            extra) and sum to 1.
        """
        states = blim.states
        missing = set(states) - set(prior)
        if missing:
            raise ValueError(f"Prior is missing states: {[set(s) for s in missing]}")
        extra = set(prior) - set(states)
        if extra:
            raise ValueError(
                f"Prior contains states not in the structure: {[set(s) for s in extra]}. "
                f"Probability mass assigned to states outside the structure cannot be represented."
            )
        probs = np.array([prior[s] for s in states])
        return cls(blim, probs)


    @property
    def blim(self) -> BLIM:
        return self._blim

    @property
    def probabilities(self) -> np.ndarray:
        return self._probs

    @property
    def states(self) -> list[frozenset[str]]:
        return self._blim.states

    # ------------------------------------------------------------------
    # Bayesian update
    # ------------------------------------------------------------------


[docs]
    def update(self, item: str, response: bool) -> StatePosterior:
        """Return a new posterior after observing a response.

        Applies Bayes' theorem:
            P(state | response) ∝ P(response | state) × P(state)

        Parameters
        ----------
        item : str
            The item that was assessed. Must be in the structure's domain.
        response : bool
            True for correct, False for incorrect.

        Raises
        ------
        ValueError
            If item is not in the domain, or if the observation has
            zero probability under all states with nonzero prior
            (impossible evidence).
        """
        likelihoods = self._blim.likelihood_vector(item, response)
        unnormalized = likelihoods * self._probs
        normalizer = unnormalized.sum()
        if normalizer == 0:
            raise ValueError(
                f"Impossible evidence: P(item='{item}', "
                f"response={response}) = 0 under current posterior. "
                f"This indicates a model/data inconsistency."
            )
        posterior = unnormalized / normalizer
        return StatePosterior(self._blim, posterior)


    # ------------------------------------------------------------------
    # Queries on the distribution
    # ------------------------------------------------------------------

    @property
    def entropy(self) -> float:
        """Shannon entropy (bits) of the current distribution."""
        p = self._probs[self._probs > 0]
        return float(-np.sum(p * np.log2(p)))

    @property
    def most_likely_state(self) -> tuple[frozenset[str], float]:
        """Return (state, probability) of the most probable state."""
        idx = int(np.argmax(self._probs))
        return self._blim.states[idx], float(self._probs[idx])


[docs]
    def marginal_mastery(self) -> dict[str, float]:
        """Marginal probability of mastering each item.

        For each item q: P(q mastered) = sum of P(state) for all
        states containing q.
        """
        result: dict[str, float] = {}
        for item in self._blim.structure.domain:
            prob = sum(self._probs[i] for i, state in enumerate(self._blim.states) if item in state)
            result[item] = float(prob)
        return result





[docs]
def shannon_entropy(probs: np.ndarray) -> float:
    """Shannon entropy in bits, handling zero probabilities."""
    p = probs[probs > 0]
    return float(-np.sum(p * np.log2(p)))