rthor._vectorized

Vectorized operations for RTHOR algorithm.

This module contains high-performance vectorized implementations of core RTHOR operations, replacing nested loops from the original RTHORR implementation with NumPy broadcasting.

FUNCTION	DESCRIPTION
`build_comparison_matrix`	Build comparison matrix from correlation vector.
`count_agreements`	Count agreements and ties between comparison and hypothesis matrices.
`build_hypothesis_matrix`	Build hypothesis matrix from order array.
`extract_upper_triangle_vector`	Extract upper triangle of matrix as vector (row-major order).
`count_pairwise_agreements`	Count agreement patterns between two matrices.
`calculate_comparison_ci`	Calculate Correspondence Index for pairwise comparison.

build_comparison_matrix

build_comparison_matrix(correlations_vector: ndarray) -> np.ndarray

Build comparison matrix from correlation vector.

Notes

Performance: O(1) broadcast vs O(n²) nested loops. Original R code (randall.R:135-140) used nested loops.

PARAMETER	DESCRIPTION
`correlations_vector`	1D array of correlation values from upper triangle TYPE: `ndarray`

RETURNS	DESCRIPTION
`ndarray`	Matrix where: comparison_matrix[i,j] = 1 if correlations_vector[j] > correlations_vector[i] comparison_matrix[i,j] = 2 if correlations_vector[j] == correlations_vector[i] comparison_matrix[i,j] = 0 if correlations_vector[j] < correlations_vector[i]

Examples:

>>> corr = np.array([0.8, 0.6, 0.7])
>>> comp = build_comparison_matrix(corr)
>>> comp.shape
(3, 3)

Source code in src/rthor/_vectorized.py

def build_comparison_matrix(correlations_vector: np.ndarray) -> np.ndarray:
    """Build comparison matrix from correlation vector.

    Notes:
        Performance: O(1) broadcast vs O(n²) nested loops.
        Original R code (`randall.R:135-140`) used nested loops.

    Args:
        correlations_vector: 1D array of correlation values from upper triangle

    Returns:
        Matrix where:

            - comparison_matrix[i,j] = 1 if correlations_vector[j] > correlations_vector[i]
            - comparison_matrix[i,j] = 2 if correlations_vector[j] == correlations_vector[i]
            - comparison_matrix[i,j] = 0 if correlations_vector[j] < correlations_vector[i]

    Examples:
        >>> corr = np.array([0.8, 0.6, 0.7])
        >>> comp = build_comparison_matrix(corr)
        >>> comp.shape
        (3, 3)

    """  # noqa: E501
    # Reshape for broadcasting: column vector and row vector
    corr_i = correlations_vector[:, np.newaxis]  # Shape: (n, 1)
    corr_j = correlations_vector[np.newaxis, :]  # Shape: (1, n)

    # Vectorized comparison using np.where
    # This broadcasts to (n, n) automatically
    return np.where(
        corr_j > corr_i,
        1,
        np.where(corr_j == corr_i, 2, 0),
    ).astype(np.int32)

count_agreements

count_agreements(comparison_matrix: ndarray, hypothesis_matrix: ndarray) -> tuple[int, int]

Count agreements and ties between comparison and hypothesis matrices.

Notes

Performance: O(1) boolean indexing vs O(n²) nested loops. Original R code (randall.R:143-146) used nested loops.

PARAMETER	DESCRIPTION
`comparison_matrix`	Comparison matrix from build_comparison_matrix() TYPE: `ndarray`
`hypothesis_matrix`	Hypothesis matrix (1 where prediction exists, 0 otherwise) TYPE: `ndarray`

RETURNS	DESCRIPTION
`n_agreements`	Number of agreements (comparison==1 and hypothesis==1) TYPE: `int`
`n_ties`	Number of ties (comparison==2 and hypothesis==1) TYPE: `int`

Examples:

>>> comp = np.array([[0, 1, 0], [0, 0, 2], [1, 0, 0]])
>>> hyp = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
>>> n_agr, n_tie = count_agreements(comp, hyp)

Source code in src/rthor/_vectorized.py

def count_agreements(
    comparison_matrix: np.ndarray,
    hypothesis_matrix: np.ndarray,
) -> tuple[int, int]:
    """Count agreements and ties between comparison and hypothesis matrices.

    Notes:
        Performance: O(1) boolean indexing vs O(n²) nested loops.
        Original R code (`randall.R:143-146`) used nested loops.

    Args:
        comparison_matrix: Comparison matrix from build_comparison_matrix()
        hypothesis_matrix: Hypothesis matrix (1 where prediction exists, 0 otherwise)

    Returns:
        n_agreements: Number of agreements (comparison==1 and hypothesis==1)
        n_ties: Number of ties (comparison==2 and hypothesis==1)

    Examples:
        >>> comp = np.array([[0, 1, 0], [0, 0, 2], [1, 0, 0]])
        >>> hyp = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
        >>> n_agr, n_tie = count_agreements(comp, hyp)

    """
    # Create boolean masks for hypothesized predictions
    hypothesis_mask = hypothesis_matrix == 1

    # Count agreements: where both comparison and hypothesis are 1
    n_agreements = int(np.sum((comparison_matrix == 1) & hypothesis_mask))

    # Count ties: where comparison is 2 (tie) and hypothesis is 1
    n_ties = int(np.sum((comparison_matrix == 2) & hypothesis_mask))

    return n_agreements, n_ties

build_hypothesis_matrix

build_hypothesis_matrix(order_array: ndarray) -> np.ndarray

Build hypothesis matrix from order array.

Notes

Performance: O(1) broadcast vs O(n²) nested loops. Original R code (randall.R:71-79) used nested loops.

PARAMETER	DESCRIPTION
`order_array`	1D array specifying hypothesized ordering TYPE: `ndarray`

RETURNS	DESCRIPTION
`hypothesis_matrix`	Matrix where hypothesis_matrix[i,j] = 1 if order_array[j] < order_array[i] TYPE: `ndarray`

Examples:

>>> order = np.array([1, 2, 3, 2, 1])
>>> hyp = build_hypothesis_matrix(order)
>>> hyp.shape
(5, 5)

Source code in src/rthor/_vectorized.py

def build_hypothesis_matrix(order_array: np.ndarray) -> np.ndarray:
    """Build hypothesis matrix from order array.

    Notes:
        Performance: O(1) broadcast vs O(n²) nested loops.
        Original R code (`randall.R:71-79`) used nested loops.

    Args:
        order_array: 1D array specifying hypothesized ordering

    Returns:
        hypothesis_matrix: Matrix where hypothesis_matrix[i,j] = 1
            if order_array[j] < order_array[i]

    Examples:
        >>> order = np.array([1, 2, 3, 2, 1])
        >>> hyp = build_hypothesis_matrix(order)
        >>> hyp.shape
        (5, 5)

    """
    # Reshape for broadcasting
    order_i = order_array[:, np.newaxis]  # Column vector
    order_j = order_array[np.newaxis, :]  # Row vector

    return (order_j < order_i).astype(np.int32)

extract_upper_triangle_vector

extract_upper_triangle_vector(matrix: ndarray) -> np.ndarray

Extract upper triangle of matrix as vector (row-major order).

Notes

This matches R's row-major ordering for consistency with RTHORR implementation. Extracts in the same order as R code (randall.R:126-131) for exact parity.

Uses row-major traversal: \((0,1), (0,2), ..., (0,n-1), (1,2), ..., (n-2,n-1)\)

PARAMETER	DESCRIPTION
`matrix`	2D square matrix (n x n) TYPE: `ndarray`

RETURNS	DESCRIPTION
`ndarray`	1D array of upper triangle values (length = n*(n-1)/2)

Examples:

>>> mat = np.array([[1.0, 0.8, 0.6],
...                 [0.8, 1.0, 0.7],
...                 [0.6, 0.7, 1.0]])
>>> vec = extract_upper_triangle_vector(mat)
>>> vec
array([0.8, 0.6, 0.7])

Source code in src/rthor/_vectorized.py

def extract_upper_triangle_vector(matrix: np.ndarray) -> np.ndarray:
    """Extract upper triangle of matrix as vector (row-major order).

    Notes:
        This matches R's row-major ordering for consistency with RTHORR implementation.
        Extracts in the same order as R code (`randall.R:126-131`) for exact parity.

        Uses row-major traversal: $(0,1), (0,2), ..., (0,n-1), (1,2), ..., (n-2,n-1)$

    Args:
        matrix: 2D square matrix (n x n)

    Returns:
        1D array of upper triangle values (length = n*(n-1)/2)

    Examples:
        >>> mat = np.array([[1.0, 0.8, 0.6],
        ...                 [0.8, 1.0, 0.7],
        ...                 [0.6, 0.7, 1.0]])
        >>> vec = extract_upper_triangle_vector(mat)
        >>> vec
        array([0.8, 0.6, 0.7])

    """
    n = matrix.shape[0]
    n_pairs = (n * (n - 1)) // 2

    vector = np.zeros(n_pairs, dtype=matrix.dtype)
    idx = 0

    # Row-major order to match R
    for i in range(n):
        for j in range(n):
            if i >= j:
                continue
            vector[idx] = matrix[i, j]
            idx += 1

    return vector

count_pairwise_agreements

count_pairwise_agreements(comp_mat1: ndarray, comp_mat2: ndarray, hypothesis_matrix: ndarray) -> tuple[int, int, int, int]

Count agreement patterns between two matrices.

The logic checks for each hypothesized prediction (hypothesis_matrix==1):

\(m1 = 1\) if comp_mat1 agrees (==1)
\(m2 = 1\) if comp_mat2 agrees (==1)

Then counts: \((m1=1,m2=1), (m1=1,m2=0), (m1=0,m2=1), (m1=0,m2=0)\)

Notes

Vectorized replacement for quadruple nested loops in randmf().

Performance: O(1) vs O(n⁴) nested loops. Original R code (randmf.R:256-280) used 4 nested loops.

PARAMETER	DESCRIPTION
`comp_mat1`	Comparison matrix for first correlation matrix TYPE: `ndarray`
`comp_mat2`	Comparison matrix for second correlation matrix TYPE: `ndarray`
`hypothesis_matrix`	Hypothesis matrix TYPE: `ndarray`

RETURNS	DESCRIPTION
`both_agree`	Count where both matrices satisfy hypothesis TYPE: `int`
`only1`	Count where only matrix 1 satisfies hypothesis TYPE: `int`
`only2`	Count where only matrix 2 satisfies hypothesis TYPE: `int`
`neither`	Count where neither matrix satisfies hypothesis TYPE: `int`

Source code in src/rthor/_vectorized.py

def count_pairwise_agreements(
    comp_mat1: np.ndarray,
    comp_mat2: np.ndarray,
    hypothesis_matrix: np.ndarray,
) -> tuple[int, int, int, int]:
    """Count agreement patterns between two matrices.

    The logic checks for each hypothesized prediction (hypothesis_matrix==1):

    - $m1 = 1$ if `comp_mat1` agrees (==1)
    - $m2 = 1$ if `comp_mat2` agrees (==1)

    Then counts: $(m1=1,m2=1), (m1=1,m2=0), (m1=0,m2=1), (m1=0,m2=0)$

    Notes:
        Vectorized replacement for quadruple nested loops in `randmf()`.

        Performance: O(1) vs O(n⁴) nested loops.
        Original R code (`randmf.R:256-280`) used 4 nested loops.

    Args:
        comp_mat1: Comparison matrix for first correlation matrix
        comp_mat2: Comparison matrix for second correlation matrix
        hypothesis_matrix: Hypothesis matrix

    Returns:
        both_agree: Count where both matrices satisfy hypothesis
        only1: Count where only matrix 1 satisfies hypothesis
        only2: Count where only matrix 2 satisfies hypothesis
        neither: Count where neither matrix satisfies hypothesis

    """
    # Create masks for hypothesized predictions
    hypothesis_mask = hypothesis_matrix == 1

    # For hypothesized predictions, check if each matrix agrees (==1) or disagrees (==0)
    # Note: ties (==2) are NOT counted as either agreement or disagreement
    m1_agrees = (comp_mat1 == 1) & hypothesis_mask
    m1_disagrees = (comp_mat1 == 0) & hypothesis_mask
    m2_agrees = (comp_mat2 == 1) & hypothesis_mask
    m2_disagrees = (comp_mat2 == 0) & hypothesis_mask

    # Count patterns using boolean operations
    # Both agree: m1==1 AND m2==1
    both_agree = int(np.sum(m1_agrees & m2_agrees))
    # Only1 agrees: m1==1 AND m2==0 (not m2==1 or m2==2)
    only1 = int(np.sum(m1_agrees & m2_disagrees))
    # Only2 agrees: m1==0 AND m2==1
    only2 = int(np.sum(m1_disagrees & m2_agrees))
    # Neither agrees: m1==0 AND m2==0
    neither = int(np.sum(m1_disagrees & m2_disagrees))

    return both_agree, only1, only2, neither

calculate_comparison_ci

calculate_comparison_ci(only1: int, only2: int, both_agree: int, neither: int) -> float

Calculate Correspondence Index for pairwise comparison.

Formula from R code (randmf.R:205):

\[ CI = \frac{\text{only2} - \text{only1}}{\text{both} + \text{only1} + \text{only2} + \text{neither}} \]

PARAMETER	DESCRIPTION
`only1`	Count where only matrix 1 agrees TYPE: `int`
`only2`	Count where only matrix 2 agrees TYPE: `int`
`both_agree`	Count where both agree TYPE: `int`
`neither`	Count where neither agrees TYPE: `int`

RETURNS	DESCRIPTION
`float`	Comparison CI

Source code in src/rthor/_vectorized.py

def calculate_comparison_ci(
    only1: int,
    only2: int,
    both_agree: int,
    neither: int,
) -> float:
    r"""Calculate Correspondence Index for pairwise comparison.

    Formula from R code (`randmf.R:205`):

    $$
    CI = \frac{\text{only2} - \text{only1}}{\text{both} + \text{only1} + \text{only2} + \text{neither}}
    $$

    Args:
        only1: Count where only matrix 1 agrees
        only2: Count where only matrix 2 agrees
        both_agree: Count where both agree
        neither: Count where neither agrees

    Returns:
        Comparison CI

    """  # noqa: E501
    denominator = both_agree + only1 + only2 + neither
    if denominator == 0:
        return 0.0

    return (only2 - only1) / denominator

:::