Source code for knowledgespaces.assessment.adaptive

"""
Adaptive assessment: item selection policies and termination criteria.

Provides the Expected Information Gain (EIG) policy for selecting
the most informative item at each step of an adaptive assessment.

References:
    Cover, T. M., & Thomas, J. A. (2006).
    Elements of Information Theory, 2nd ed. Wiley.
"""

from __future__ import annotations

from dataclasses import dataclass

import numpy as np

from knowledgespaces.assessment.blim import StatePosterior, shannon_entropy


[docs] @dataclass(frozen=True) class ItemScore: """Score of an item under a selection policy.""" item: str score: float
[docs] def select_item_eig( posterior: StatePosterior, candidates: list[str] | None = None, exclude: set[str] | None = None, ) -> ItemScore: """Select the item maximizing Expected Information Gain. EIG(q) = H(current) - E[H(posterior after observing q)] where the expectation is over both possible responses (correct/incorrect), weighted by their marginal probability. Parameters ---------- posterior : StatePosterior Current state distribution. candidates : list[str] or None Items to consider. If None, uses all items in the domain. exclude : set[str] or None Items to exclude (e.g. already asked). Applied after candidates. Returns ------- ItemScore The best item and its EIG score. Raises ------ ValueError If no candidates are available, or if candidates contains items not in the domain. """ blim = posterior.blim if candidates is None: candidates = sorted(blim.structure.domain) if exclude: candidates = [c for c in candidates if c not in exclude] if not candidates: raise ValueError("No candidate items available.") extra = set(candidates) - blim.structure.domain if extra: raise ValueError(f"Candidates contain items not in the domain: {extra}") current_entropy = posterior.entropy best_item = candidates[0] best_eig = -np.inf for item in candidates: # Marginal probability of correct response lh_correct = blim.likelihood_vector(item, True) prob_correct = float(np.sum(lh_correct * posterior.probabilities)) # Posterior entropy if correct post_c = lh_correct * posterior.probabilities sum_c = post_c.sum() if sum_c > 0: post_c = post_c / sum_c entropy_correct = shannon_entropy(post_c) # Posterior entropy if incorrect lh_incorrect = blim.likelihood_vector(item, False) post_i = lh_incorrect * posterior.probabilities sum_i = post_i.sum() if sum_i > 0: post_i = post_i / sum_i entropy_incorrect = shannon_entropy(post_i) # Expected entropy after observing this item expected_entropy = prob_correct * entropy_correct + (1 - prob_correct) * entropy_incorrect eig = current_entropy - expected_entropy if eig > best_eig: best_eig = eig best_item = item return ItemScore(item=best_item, score=float(best_eig))
[docs] def is_converged( posterior: StatePosterior, threshold: float = 0.85, ) -> bool: """Check if the assessment has converged. Convergence occurs when the most likely state has probability above the threshold. Parameters ---------- posterior : StatePosterior Current state distribution. threshold : float Probability threshold for convergence. Must be in (0, 1]. Default 0.85. Raises ------ ValueError If threshold is not in (0, 1]. """ if not (0 < threshold <= 1): raise ValueError(f"threshold must be in (0, 1], got {threshold}") _, max_prob = posterior.most_likely_state return max_prob >= threshold