Source code for knowledgespaces.metrics.agreement

"""
Agreement measures between knowledge structures or relations.

Provides Cohen's kappa and related indices for comparing
prerequisite matrices or state memberships.
"""

from __future__ import annotations

from knowledgespaces.structures.relations import SurmiseRelation



[docs]
def cohens_kappa(rel1: SurmiseRelation, rel2: SurmiseRelation) -> float:
    """Cohen's kappa for agreement on prerequisite relations.

    Compares two surmise relations on the same domain. For every
    ordered pair (a, b) with a ≠ b, counts agreement/disagreement
    on whether (a, b) is a prerequisite.

    Parameters
    ----------
    rel1, rel2 : SurmiseRelation
        Must have the same item domain.

    Returns
    -------
    float
        Cohen's kappa in [-1, 1]: 1 = perfect agreement, 0 = chance
        agreement, <0 = worse than chance. Returns ``nan`` if the domain
        has fewer than 2 items (kappa is undefined there).

    Raises
    ------
    ValueError
        If the relations have different domains.

    Notes
    -----
    Both relations are first reduced to their transitive closure, so two
    representations of the same partial order agree. Agreement is then
    counted over the ordered pairs (a, b) with ``a != b`` — the strict,
    irreflexive relation. The diagonal is excluded because reflexivity is
    shared by every surmise relation and would only inflate agreement,
    giving the pair-based denominator :math:`n(n-1)`. This is Cohen's
    (1960) kappa applied to the off-diagonal cells of the relation matrix.
    """
    if rel1.items != rel2.items:
        raise ValueError("Relations must have the same domain for kappa.")

    # Normalize to transitive closure so that equivalent partial orders
    # (e.g. a→b→c vs a→b→c + a→c) produce the same result.
    rel1 = rel1.transitive_closure()
    rel2 = rel2.transitive_closure()

    items = sorted(rel1.items)
    n = len(items)
    if n < 2:
        return float("nan")  # kappa is undefined for fewer than 2 items

    # Count agreement on all ordered pairs (a, b), a ≠ b
    both_yes = 0
    both_no = 0
    r1_yes_r2_no = 0
    r1_no_r2_yes = 0

    for a in items:
        for b in items:
            if a == b:
                continue
            in1 = (a, b) in rel1.relations
            in2 = (a, b) in rel2.relations
            if in1 and in2:
                both_yes += 1
            elif not in1 and not in2:
                both_no += 1
            elif in1:
                r1_yes_r2_no += 1
            else:
                r1_no_r2_yes += 1

    # total == n(n - 1) >= 2 here (n >= 2 guaranteed above), always positive.
    total = both_yes + both_no + r1_yes_r2_no + r1_no_r2_yes

    p_observed = (both_yes + both_no) / total

    p1_yes = (both_yes + r1_yes_r2_no) / total
    p2_yes = (both_yes + r1_no_r2_yes) / total
    p_expected = p1_yes * p2_yes + (1 - p1_yes) * (1 - p2_yes)

    if p_expected == 1.0:
        return 1.0  # both raters always agree

    return (p_observed - p_expected) / (1 - p_expected)