Source code for knowledgespaces.assessment.instances
"""
Instance pool: multiple questions per item.
In KST assessment, each item (competency) can be tested through
multiple instances (concrete questions). The adaptive engine selects
the best instance, maps it to its parent item for the BLIM update,
and excludes only that specific instance from future selection.
This module provides the InstancePool data structure and an
instance-aware version of select_item_eig.
"""
from __future__ import annotations
import random
from dataclasses import dataclass
import numpy as np
from knowledgespaces.assessment.blim import StatePosterior, shannon_entropy
[docs]
@dataclass(frozen=True)
class Instance:
"""A concrete question that tests a specific item.
Parameters
----------
id : str
Unique identifier for this instance.
item : str
The item (competency) this instance tests.
"""
id: str
item: str
[docs]
class InstancePool:
"""A collection of instances mapped to items.
Parameters
----------
instances : list[Instance]
All available instances.
Raises
------
ValueError
If instance IDs are not unique, or if an instance references
an item not in the provided domain.
"""
def __init__(self, instances: list[Instance]) -> None:
seen: set[str] = set()
for inst in instances:
if inst.id in seen:
raise ValueError(f"Duplicate instance ID: '{inst.id}'.")
seen.add(inst.id)
self._instances: dict[str, Instance] = {inst.id: inst for inst in instances}
self._by_item: dict[str, list[str]] = {}
for inst in instances:
self._by_item.setdefault(inst.item, []).append(inst.id)
@property
def items(self) -> set[str]:
"""All unique items covered by this pool."""
return set(self._by_item.keys())
@property
def instance_ids(self) -> set[str]:
return set(self._instances.keys())
[docs]
def item_of(self, instance_id: str) -> str:
"""Get the parent item of an instance."""
return self._instances[instance_id].item
[docs]
def instances_for(self, item: str) -> list[str]:
"""Get all instance IDs for a given item."""
return list(self._by_item.get(item, []))
def n_instances(self) -> int:
return len(self._instances)
[docs]
def validate_domain(self, domain: frozenset[str]) -> None:
"""Verify that pool items match the structure's domain.
Raises
------
ValueError
If pool contains items not in domain, or domain has items
with no instances.
"""
extra = self.items - domain
if extra:
raise ValueError(f"InstancePool contains items not in the domain: {extra}")
missing = domain - self.items
if missing:
raise ValueError(f"Domain items have no instances in the pool: {missing}")
[docs]
@classmethod
def from_dict(cls, mapping: dict[str, list[str]]) -> InstancePool:
"""Create from {item: [instance_id, ...]}.
Example::
pool = InstancePool.from_dict({
"addition": ["add_q1", "add_q2", "add_q3"],
"subtraction": ["sub_q1", "sub_q2"],
})
"""
instances = []
for item, ids in mapping.items():
for inst_id in ids:
instances.append(Instance(id=inst_id, item=item))
return cls(instances)
def __len__(self) -> int:
return len(self._instances)
[docs]
@dataclass(frozen=True)
class InstanceScore:
"""Score of an instance under the EIG policy."""
instance_id: str
item: str
score: float
[docs]
def select_instance_eig(
posterior: StatePosterior,
pool: InstancePool,
asked: set[str] | None = None,
) -> InstanceScore:
"""Select the instance maximizing Expected Information Gain.
This is the instance-aware version of select_item_eig. It computes
EIG per item, picks the item with highest EIG, then selects a
random un-asked instance of that item (since instances of the same
item are equivalent from the BLIM perspective).
Parameters
----------
posterior : StatePosterior
Current state distribution.
pool : InstancePool
Available instances.
asked : set[str] or None
Instance IDs already asked (excluded from selection).
Returns
-------
InstanceScore
The best instance, its parent item, and EIG score.
Raises
------
ValueError
If no un-asked instances remain, or if pool items don't match
the structure's domain.
"""
asked = asked or set()
blim = posterior.blim
pool.validate_domain(blim.structure.domain)
# Compute EIG per item (once), then map to best available instance
current_entropy = shannon_entropy(posterior.probabilities)
item_eig: dict[str, float] = {}
for item in sorted(blim.structure.domain):
lh_correct = blim.likelihood_vector(item, True)
prob_correct = float(np.sum(lh_correct * posterior.probabilities))
post_c = lh_correct * posterior.probabilities
sum_c = post_c.sum()
if sum_c > 0:
post_c = post_c / sum_c
lh_incorrect = blim.likelihood_vector(item, False)
post_i = lh_incorrect * posterior.probabilities
sum_i = post_i.sum()
if sum_i > 0:
post_i = post_i / sum_i
expected_entropy = prob_correct * shannon_entropy(post_c) + (
1 - prob_correct
) * shannon_entropy(post_i)
item_eig[item] = current_entropy - expected_entropy
# Find the best un-asked instance (random among equivalents)
best: InstanceScore | None = None
for item in sorted(item_eig, key=lambda i: item_eig[i], reverse=True):
available = [iid for iid in pool.instances_for(item) if iid not in asked]
if available:
chosen = random.choice(available)
candidate = InstanceScore(
instance_id=chosen,
item=item,
score=item_eig[item],
)
if best is None or candidate.score > best.score:
best = candidate
if best is None:
raise ValueError("No un-asked instances available.")
return best