Source code for knowledgespaces.query.block2

"""
Block 2+ of the QUERY algorithm: learning space refinement.

Refines an ordinal space (from Block 1) into a learning space by
querying group prerequisites. Block N tests antecedent sets of size N.

Block 2 handles antecedent size 2, Block 3 size 3, etc. The algorithm
is identical at every level --- only the antecedent cardinality changes.

Three inference mechanisms reduce expert queries:

- Negative monotonicity: if q is globally minimal (Qmax), then (A,q)=NO.
- Positive monotonicity: if any subset of A already implies q, then (A,q)=YES.
- Structural test (Theorem 43): if answering YES would create hanging
  states, then the answer must be NO.

References:
    Falmagne, J.-C., & Doignon, J.-P. (2011).
    Learning Spaces, Chapter 15. Springer-Verlag.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from itertools import combinations

from knowledgespaces._limits import DEFAULT_MAX_N_ITEMS, check_domain_size
from knowledgespaces.query.expert import Expert
from knowledgespaces.query.types import InferenceSource, Query, QueryAnswer
from knowledgespaces.structures.knowledge_structure import KnowledgeStructure



[docs]
@dataclass
class BlockNStats:
    """Statistics from a Block N execution."""

    expert_queries: int = 0
    inferred_negative_monotonicity: int = 0
    inferred_positive_monotonicity: int = 0
    inferred_structural: int = 0

    @property
    def total_inferences(self) -> int:
        return (
            self.inferred_negative_monotonicity
            + self.inferred_positive_monotonicity
            + self.inferred_structural
        )

    @property
    def total_candidates(self) -> int:
        return self.expert_queries + self.total_inferences




[docs]
@dataclass
class BlockNResult:
    """Result of a Block N query phase."""

    structure: KnowledgeStructure
    positive: set[tuple[frozenset[str], str]]
    negative: set[tuple[frozenset[str], str]]
    stats: BlockNStats
    log: list[QueryAnswer] = field(default_factory=list)



def _is_learning_space(states: set[frozenset[str]], domain: frozenset[str]) -> bool:
    """Check if a set of states satisfies Theorem 41 (no hanging states).

    This verifies the necessary condition for a learning space used by
    the QUERY algorithm's structural test (Theorem 43): the structure
    contains ∅ and Q, and every non-empty state has at least one
    single-item removal that yields another state.

    For knowledge spaces (closed under union), this local condition is
    equivalent to the full learning space definition. During Block N
    execution, L_current always starts as a learning space and is only
    modified by removing states, so this check is sufficient.
    """
    if frozenset() not in states or domain not in states:
        return False
    for state in states:
        if not state:
            continue
        # A state is hanging if no single-item removal yields another state
        if not any(state - {q} in states for q in state):
            return False
    return True



[docs]
def run_block_n(
    items: list[str],
    structure: KnowledgeStructure,
    prior_positive: set[tuple[frozenset[str], str]],
    expert: Expert,
    *,
    antecedent_size: int = 2,
    minimal_global: frozenset[str] = frozenset(),
    max_items: int | None = None,
) -> BlockNResult:
    """Execute Block N of the QUERY algorithm.

    Parameters
    ----------
    items : list[str]
        The domain of items.
    structure : KnowledgeStructure
        The current knowledge structure to refine.
    prior_positive : set[tuple[frozenset[str], str]]
        Positive relations from all previous blocks. Each entry is
        (antecedent_set, consequent). Single-item antecedents come
        from Block 1's closure: {(frozenset({a}), b) for (a,b) in closure}.
    expert : Expert
        The expert to query.
    antecedent_size : int
        Size of antecedent sets to test. Default 2 (standard Block 2).
    minimal_global : frozenset[str]
        Items certified globally minimal by Qmax in Block 1.
    max_items : int | None
        Hard limit override for the ``|Q|`` preflight check. Block N scales
        as ``O(|Q| * C(|Q|-1, k))``; values above ~25 are not practical.

    Returns
    -------
    BlockNResult
        Refined structure, discovered relations, and stats.
    """
    check_domain_size(
        len(items),
        max_threshold=max_items if max_items is not None else DEFAULT_MAX_N_ITEMS,
        context="run_block_n",
    )

    domain = frozenset(items)
    L_current: set[frozenset[str]] = set(structure.states)
    P_current: set[tuple[frozenset[str], str]] = set(prior_positive)
    N_current: set[tuple[frozenset[str], str]] = set()
    stats = BlockNStats()
    log: list[QueryAnswer] = []

    # Generate candidate queries: all (A, q) with |A| = antecedent_size
    candidates: list[tuple[frozenset[str], str]] = []
    for q in items:
        others = [x for x in items if x != q]
        for combo in combinations(others, antecedent_size):
            candidates.append((frozenset(combo), q))

    for A, q in candidates:
        # --- Inference 1: Negative monotonicity (Qmax) ---
        if q in minimal_global and q not in A:
            N_current.add((A, q))
            stats.inferred_negative_monotonicity += 1
            log.append(
                QueryAnswer(
                    Query.group(A, q),
                    False,
                    InferenceSource.MONOTONICITY_NEGATIVE,
                )
            )
            continue

        # --- Inference 2: Positive monotonicity ---
        # If any proper subset of A (from prior blocks) implies q, then A implies q.
        inferred_positive = False
        for size in range(1, len(A)):
            for sub_combo in combinations(A, size):
                sub = frozenset(sub_combo)
                if (sub, q) in P_current:
                    inferred_positive = True
                    break
            if inferred_positive:
                break

        if inferred_positive:
            stats.inferred_positive_monotonicity += 1
            log.append(
                QueryAnswer(
                    Query.group(A, q),
                    True,
                    InferenceSource.MONOTONICITY_POSITIVE,
                )
            )
            continue

        # --- Inference 3: Structural test (Theorem 43) ---
        # Simulate YES: remove states containing q but disjoint from A
        DK = {state for state in L_current if q in state and not (A & state)}
        L_potential = L_current - DK

        if not _is_learning_space(L_potential, domain):
            N_current.add((A, q))
            stats.inferred_structural += 1
            log.append(
                QueryAnswer(
                    Query.group(A, q),
                    False,
                    InferenceSource.STRUCTURAL,
                )
            )
            continue

        # --- Expert query ---
        query = Query.group(A, q)
        answer = expert(query)
        stats.expert_queries += 1
        log.append(QueryAnswer(query, answer, InferenceSource.EXPERT))

        if answer:
            L_current = L_potential
            P_current.add((A, q))
        else:
            N_current.add((A, q))

    result_structure = KnowledgeStructure(items, L_current)

    return BlockNResult(
        structure=result_structure,
        positive=P_current,
        negative=N_current,
        stats=stats,
        log=log,
    )