Source code for knowledgespaces.query.block1

"""
Block 1 of the QUERY algorithm: surmise relation discovery.

Extracts prerequisite relations between items by querying an expert
with pair queries (a -> q) and optional Qmax minimality tests.

**Implementation note — multi-phase optimization.**

The standard QUERY Block 1 (Koppen & Doignon, 1990; Learning Spaces,
Falmagne & Doignon, 2011, Ch. 15) iterates over all item pairs and
uses transitivity + antisymmetry to prune redundant queries.

This implementation splits Block 1 into four phases as an
*optimization* that maximises early inference opportunities:

- Phase 0 (optional): Qmax test — group query Q\\{q} → q for each item,
  identifies globally minimal items that need no pair queries.
- Phase 1: Forward pass — pair queries with transitivity/antisymmetry
  pruning, processing items in input order.
- Phase 2: Bottom-up pass — re-explores from minimal items outward,
  exploiting newly discovered transitivity.
- Phase 3: Final verification sweep — covers any pair not yet resolved
  by Phases 1–2, guaranteeing completeness.

The result is identical to the standard single-pass algorithm: the same
surmise relation is discovered, but typically with fewer expert queries
thanks to the ordering strategy. Completeness is guaranteed by Phase 3.

**Antisymmetry assumption (item-level QUERY).**

Block 1 assumes the target surmise relation is a *partial order*
(antisymmetric): once a prerequisite (q, r) is established, the reverse
(r, q) is inferred to be False *without re-querying the expert*. This is
the standard, valid assumption when QUERY is run on a *discriminative*
item domain, where no two distinct items are equivalent. The inference is
fully traceable, not silent: each inferred pair is logged with
``InferenceSource.ANTISYMMETRY``, counted in
``Block1Stats.skipped_by_antisymmetry``, and listed by
``Block1Result.assumed_antisymmetric_pairs``. Equivalent (mutually
prerequisite) items belong to competence-based KST (skills), not item-level
QUERY: if your domain may contain equivalent items, collapse them first or
model skills via :mod:`knowledgespaces.derivation`.

References:
    Koppen, M., & Doignon, J.-P. (1990).
    How to build a knowledge space by querying an expert.
    Journal of Mathematical Psychology, 34, 311-331.

    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces, Chapter 15. Springer-Verlag.
"""

from __future__ import annotations

from dataclasses import dataclass, field

from knowledgespaces._limits import DEFAULT_MAX_N_ITEMS, check_domain_size
from knowledgespaces.query.expert import Expert
from knowledgespaces.query.types import InferenceSource, Query, QueryAnswer
from knowledgespaces.structures.relations import SurmiseRelation



[docs]
@dataclass
class Block1Stats:
    """Statistics from Block 1 execution."""

    pair_queries: int = 0
    group_queries: int = 0
    deduced_by_transitivity: int = 0
    skipped_by_antisymmetry: int = 0

    @property
    def total_queries(self) -> int:
        return self.pair_queries + self.group_queries

    @property
    def total_inferences(self) -> int:
        return self.deduced_by_transitivity + self.skipped_by_antisymmetry




[docs]
@dataclass
class Block1Result:
    """Result of Block 1 query phase."""

    relation: SurmiseRelation
    closure: SurmiseRelation
    minimal_global: frozenset[str]
    minimal_local: frozenset[str]
    stats: Block1Stats
    log: list[QueryAnswer] = field(default_factory=list)

    @property
    def minimal_all(self) -> frozenset[str]:
        return self.minimal_global | self.minimal_local

    @property
    def assumed_antisymmetric_pairs(self) -> list[tuple[str, str]]:
        """Reverse pairs (r, q) inferred False by the antisymmetry assumption.

        These pairs were *not* put to the expert: each was inferred False
        because (q, r) is a prerequisite and the surmise relation is assumed
        antisymmetric (a partial order). Exposed for full transparency — see
        the module docstring's "Antisymmetry assumption" note.
        """
        return [
            (next(iter(qa.query.antecedent)), qa.query.consequent)
            for qa in self.log
            if qa.source is InferenceSource.ANTISYMMETRY and len(qa.query.antecedent) == 1
        ]




[docs]
def run_block1(
    items: list[str],
    expert: Expert,
    *,
    use_qmax: bool = True,
    max_items: int | None = None,
) -> Block1Result:
    """Execute Block 1 of the QUERY algorithm.

    Parameters
    ----------
    items : list[str]
        The domain of items.
    expert : Expert
        The expert to query.
    use_qmax : bool
        If True, run Phase 0 (Qmax minimality test). Default True.
    max_items : int | None
        Hard limit override for the ``|Q|`` preflight check. Block 1 is
        ``O(|Q|^5)`` worst case; above ~25 items it is not practical.

    Returns
    -------
    Block1Result
        Discovered surmise relation, closure, minimal items, and stats.
    """
    check_domain_size(
        len(items),
        max_threshold=max_items if max_items is not None else DEFAULT_MAX_N_ITEMS,
        context="run_block1",
    )

    relations: set[tuple[str, str]] = set()
    asked_pairs: set[tuple[str, str]] = set()
    resolved_pairs: set[tuple[str, str]] = set()  # all logged pairs (no duplicates)
    stats = Block1Stats()
    log: list[QueryAnswer] = []

    def _closure() -> set[tuple[str, str]]:
        return SurmiseRelation(items, relations).transitive_closure().relations

    def _record(query: Query, answer: bool, source: InferenceSource) -> None:
        log.append(QueryAnswer(query, answer, source))

    def _record_pair(r: str, q: str, answer: bool, source: InferenceSource) -> None:
        if (r, q) not in resolved_pairs:
            resolved_pairs.add((r, q))
            log.append(QueryAnswer(Query.pair(r, q), answer, source))

    # ------------------------------------------------------------------
    # Phase 0: Qmax minimality test
    # ------------------------------------------------------------------
    minimal_global: set[str] = set()

    if use_qmax:
        for q in items:
            A = frozenset(x for x in items if x != q)
            query = Query.group(A, q)
            answer = expert(query)
            stats.group_queries += 1
            _record(query, answer, InferenceSource.EXPERT)
            if not answer:
                minimal_global.add(q)

    # ------------------------------------------------------------------
    # Phase 1: Pair queries with pruning
    # ------------------------------------------------------------------
    minimal_local: set[str] = set()

    for q in items:
        if q in minimal_global:
            continue

        has_prereq = False
        closure = _closure()

        for r in items:
            if r == q:
                continue

            # Transitivity: already deducible
            if (r, q) in closure:
                stats.deduced_by_transitivity += 1
                _record_pair(r, q, True, InferenceSource.TRANSITIVITY)
                has_prereq = True
                continue

            # Antisymmetry: (q, r) in closure forbids (r, q)
            if (q, r) in closure:
                stats.skipped_by_antisymmetry += 1
                _record_pair(r, q, False, InferenceSource.ANTISYMMETRY)
                continue

            if (r, q) in asked_pairs:
                continue

            query = Query.pair(r, q)
            answer = expert(query)
            stats.pair_queries += 1
            resolved_pairs.add((r, q))
            _record(query, answer, InferenceSource.EXPERT)

            if answer:
                relations.add((r, q))
                has_prereq = True
                closure = _closure()

            asked_pairs.add((r, q))

        if not has_prereq:
            minimal_local.add(q)

    # ------------------------------------------------------------------
    # Phase 2: Bottom-up from minimals
    # ------------------------------------------------------------------
    all_minimals = minimal_global | minimal_local
    processed: set[str] = set()
    # Iterate minimals in input order so Phase 2 is deterministic across
    # runs. Phase 2 is an optimization only — the final closure/relation is
    # identical regardless of order — but `total_queries` and the *sequence*
    # of expert queries depend on the iteration order.
    queue: list[str] = [x for x in items if x in all_minimals]

    while queue:
        current = queue.pop(0)
        if current in processed:
            continue

        closure = _closure()
        for q in items:
            if q == current or q in all_minimals or q in processed:
                continue
            if (current, q) in closure:
                _record_pair(current, q, True, InferenceSource.TRANSITIVITY)
                continue
            if (q, current) in closure:
                _record_pair(current, q, False, InferenceSource.ANTISYMMETRY)
                continue
            if (current, q) in asked_pairs:
                continue

            query = Query.pair(current, q)
            answer = expert(query)
            stats.pair_queries += 1
            resolved_pairs.add((current, q))
            _record(query, answer, InferenceSource.EXPERT)

            if answer:
                relations.add((current, q))
                if q not in queue and q not in processed:
                    queue.append(q)

            asked_pairs.add((current, q))

        processed.add(current)

    # ------------------------------------------------------------------
    # Phase 3: Final verification
    # ------------------------------------------------------------------
    closure = _closure()
    for r in items:
        for q in items:
            if r == q or q in all_minimals:
                continue
            if (r, q) in asked_pairs:
                continue
            if (r, q) in closure:
                _record_pair(r, q, True, InferenceSource.TRANSITIVITY)
                continue
            if (q, r) in closure:
                _record_pair(r, q, False, InferenceSource.ANTISYMMETRY)
                continue

            query = Query.pair(r, q)
            answer = expert(query)
            stats.pair_queries += 1
            resolved_pairs.add((r, q))
            _record(query, answer, InferenceSource.EXPERT)

            if answer:
                relations.add((r, q))
                closure = _closure()

            asked_pairs.add((r, q))

    # ------------------------------------------------------------------
    # Build result
    # ------------------------------------------------------------------
    direct = SurmiseRelation(items, relations)
    final_closure = direct.transitive_closure()

    return Block1Result(
        relation=direct,
        closure=final_closure,
        minimal_global=frozenset(minimal_global),
        minimal_local=frozenset(minimal_local),
        stats=stats,
        log=log,
    )