Source code for knowledgespaces.metrics.distances
"""
Distance measures between knowledge structures.
Provides formal metrics for comparing two knowledge structures:
- Symmetric difference distance
- Hausdorff distance (on states)
- Directional Hamming distances (H→A and A→H)
- Graph Edit Distance on prerequisite relations
All functions require matching domains.
"""
from __future__ import annotations
from dataclasses import dataclass
from knowledgespaces.structures.knowledge_structure import KnowledgeStructure
from knowledgespaces.structures.relations import SurmiseRelation
def _validate_same_domain_structures(
ks1: KnowledgeStructure,
ks2: KnowledgeStructure,
fn_name: str,
) -> None:
if ks1.domain != ks2.domain:
raise ValueError(
f"{fn_name}: structures must have the same domain. "
f"Got {sorted(ks1.domain)} and {sorted(ks2.domain)}."
)
def _validate_same_domain_relations(
rel1: SurmiseRelation,
rel2: SurmiseRelation,
fn_name: str,
) -> None:
if rel1.items != rel2.items:
raise ValueError(
f"{fn_name}: relations must have the same domain. "
f"Got {sorted(rel1.items)} and {sorted(rel2.items)}."
)
[docs]
def symmetric_difference(
ks1: KnowledgeStructure,
ks2: KnowledgeStructure,
) -> int:
"""Number of states in exactly one of the two structures.
Card(K1 △ K2) = Card(K1 \\ K2) + Card(K2 \\ K1)
Raises
------
ValueError
If structures have different domains.
"""
_validate_same_domain_structures(ks1, ks2, "symmetric_difference")
only1 = ks1.states - ks2.states
only2 = ks2.states - ks1.states
return len(only1) + len(only2)
def _hamming(s1: frozenset[str], s2: frozenset[str]) -> int:
"""Hamming distance between two states (size of symmetric difference)."""
return len(s1 ^ s2)
[docs]
def hausdorff(
ks1: KnowledgeStructure,
ks2: KnowledgeStructure,
) -> int:
"""Hausdorff distance between two structures.
max(max over K in K1 of min over L in K2 of d(K,L),
max over L in K2 of min over K in K1 of d(K,L))
where d is the Hamming distance between states.
Returns 0 if the structures are identical.
Raises
------
ValueError
If structures have different domains.
"""
_validate_same_domain_structures(ks1, ks2, "hausdorff")
if not ks1.states or not ks2.states:
return 0
def _directed(source: frozenset[frozenset[str]], target: frozenset[frozenset[str]]) -> int:
return max(min(_hamming(s, t) for t in target) for s in source)
d1 = _directed(ks1.states, ks2.states)
d2 = _directed(ks2.states, ks1.states)
return max(d1, d2)
[docs]
@dataclass(frozen=True)
class DirectionalDistances:
"""Directional Hamming distances between two structures.
Attributes
----------
forward_mean : float
Mean min-distance from ks1 states to nearest ks2 state.
backward_mean : float
Mean min-distance from ks2 states to nearest ks1 state.
forward_max : int
Max min-distance from ks1 to ks2.
backward_max : int
Max min-distance from ks2 to ks1.
"""
forward_mean: float
backward_mean: float
forward_max: int
backward_max: int
[docs]
def directional_distances(
ks1: KnowledgeStructure,
ks2: KnowledgeStructure,
) -> DirectionalDistances:
"""Compute directional Hamming distances.
Raises
------
ValueError
If structures have different domains.
"""
_validate_same_domain_structures(ks1, ks2, "directional_distances")
def _min_distances(
source: frozenset[frozenset[str]], target: frozenset[frozenset[str]]
) -> list[int]:
return [min(_hamming(s, t) for t in target) for s in source]
fwd = _min_distances(ks1.states, ks2.states)
bwd = _min_distances(ks2.states, ks1.states)
return DirectionalDistances(
forward_mean=sum(fwd) / len(fwd) if fwd else 0.0,
backward_mean=sum(bwd) / len(bwd) if bwd else 0.0,
forward_max=max(fwd) if fwd else 0,
backward_max=max(bwd) if bwd else 0,
)
[docs]
def graph_edit_distance(
rel1: SurmiseRelation,
rel2: SurmiseRelation,
) -> tuple[int, int, int]:
"""Edge differences between two surmise relation graphs.
Returns
-------
added : int
Edges in rel2 but not in rel1.
removed : int
Edges in rel1 but not in rel2.
total : int
added + removed.
Raises
------
ValueError
If relations have different domains.
"""
_validate_same_domain_relations(rel1, rel2, "graph_edit_distance")
# Normalize to transitive closure so that equivalent partial orders
# (e.g. a→b→c vs a→b→c + a→c) produce the same result.
r1 = rel1.transitive_closure().relations
r2 = rel2.transitive_closure().relations
added = len(r2 - r1)
removed = len(r1 - r2)
return added, removed, added + removed