Skip to content

rthor._validation

Input validation functions for RTHOR analyses.

FUNCTION DESCRIPTION
validate_correlation_matrix

Validate that a matrix is a valid correlation matrix.

validate_order

Validate and process order specification.

validate_labels

Validate and process matrix labels.

validate_filepath

Validate filepath exists and is readable.

validate_dataframe_list

Validate list of DataFrames for RTHOR analysis.

validate_correlation_matrices_3d

Validate 3D array of correlation matrices.

validate_correlation_matrix

validate_correlation_matrix(matrix: ndarray, *, matrix_id: int | None = None) -> None

Validate that a matrix is a valid correlation matrix.

PARAMETER DESCRIPTION
matrix

Matrix to validate

TYPE: ndarray

matrix_id

Matrix identifier for error messages

TYPE: int | None DEFAULT: None

RAISES DESCRIPTION
ValueError

If matrix is not a valid correlation matrix

Source code in src/rthor/_validation.py
def validate_correlation_matrix(
    matrix: np.ndarray, *, matrix_id: int | None = None
) -> None:
    """Validate that a matrix is a valid correlation matrix.

    Args:
        matrix: Matrix to validate
        matrix_id: Matrix identifier for error messages

    Raises:
        ValueError: If matrix is not a valid correlation matrix

    """
    prefix = f"Matrix {matrix_id}: " if matrix_id is not None else ""

    if matrix.ndim != 2:
        msg = f"{prefix}Expected 2D array, got {matrix.ndim}D"
        raise ValueError(msg)

    if matrix.shape[0] != matrix.shape[1]:
        msg = f"{prefix}Expected square matrix, got shape {matrix.shape}"
        raise ValueError(msg)

    if matrix.shape[0] < 2:
        msg = (
            f"{prefix}Matrix must be at least 2x2, "
            f"got {matrix.shape[0]}x{matrix.shape[0]}"
        )
        raise ValueError(msg)

    # Check symmetry
    if not np.allclose(matrix, matrix.T, rtol=1e-10, atol=1e-12):
        msg = f"{prefix}Matrix is not symmetric"
        raise ValueError(msg)

    # Check diagonal is all 1.0
    if not np.allclose(np.diag(matrix), 1.0, rtol=1e-10, atol=1e-12):
        msg = f"{prefix}Correlation matrix diagonal must be 1.0"
        raise ValueError(msg)

    # Check values in valid range
    if not np.all((matrix >= -1 - 1e-10) & (matrix <= 1 + 1e-10)):
        msg = f"{prefix}Correlation values must be in [-1, 1]"
        raise ValueError(msg)

validate_order

validate_order(order: str | list[int] | ndarray, n_variables: int) -> np.ndarray

Validate and process order specification.

PARAMETER DESCRIPTION
order

Order specification. Can be:

  • "circular6": Preset for 6-variable circular model
  • "circular8": Preset for 8-variable circular model
  • Custom array/list of integers

TYPE: str | list[int] | ndarray

n_variables

Number of variables in correlation matrices

TYPE: int

RETURNS DESCRIPTION
order_array

Processed order array (int32)

TYPE: ndarray

RAISES DESCRIPTION
ValueError

If order specification is invalid

Source code in src/rthor/_validation.py
def validate_order(
    order: str | list[int] | np.ndarray,
    n_variables: int,
) -> np.ndarray:
    """Validate and process order specification.

    Args:
        order: Order specification. Can be:

            - "circular6": Preset for 6-variable circular model
            - "circular8": Preset for 8-variable circular model
            - Custom array/list of integers
        n_variables: Number of variables in correlation matrices

    Returns:
        order_array: Processed order array (int32)

    Raises:
        ValueError: If order specification is invalid

    """
    # Handle preset strings
    if isinstance(order, str):
        if order not in _PRESET_ORDERS:
            msg = (
                f"Unknown order preset: '{order}'. "
                f"Valid presets: {', '.join(_PRESET_ORDERS.keys())}"
            )
            raise ValueError(msg)

        order_array = _PRESET_ORDERS[order]

        # Verify preset matches n_variables
        expected_length = (n_variables * (n_variables - 1)) // 2
        if len(order_array) != expected_length:
            # Give helpful error for common case
            if order == "circular6" and n_variables != 6:
                msg = (
                    f"Order preset 'circular6' is for 6 variables, "
                    f"but data has {n_variables} variables"
                )
            elif order == "circular8" and n_variables != 8:
                msg = (
                    f"Order preset 'circular8' is for 8 variables, "
                    f"but data has {n_variables} variables"
                )
            else:
                msg = (
                    f"Order preset '{order}' length {len(order_array)} "
                    f"doesn't match data ({n_variables} variables requires "
                    f"length {expected_length})"
                )
            raise ValueError(msg)

        return order_array

    # Handle custom order
    order_array = np.asarray(order, dtype=np.int32)

    if order_array.ndim != 1:
        msg = f"Order must be 1D array, got {order_array.ndim}D"
        raise ValueError(msg)

    expected_length = (n_variables * (n_variables - 1)) // 2
    if len(order_array) != expected_length:
        msg = (
            f"Order length {len(order_array)} doesn't match expected length "
            f"{expected_length} for {n_variables} variables. "
            f"Order must have length n*(n-1)/2 where n is number of variables."
        )
        raise ValueError(msg)

    return order_array

validate_labels

validate_labels(labels: list[str] | None, n_matrices: int) -> list[str]

Validate and process matrix labels.

PARAMETER DESCRIPTION
labels

Matrix labels, or None to auto-generate

TYPE: list[str] | None

n_matrices

Number of matrices

TYPE: int

RETURNS DESCRIPTION
labels

Processed labels (auto-generated if input was None)

TYPE: list[str]

RAISES DESCRIPTION
ValueError

If number of labels doesn't match number of matrices

Source code in src/rthor/_validation.py
def validate_labels(
    labels: list[str] | None,
    n_matrices: int,
) -> list[str]:
    """Validate and process matrix labels.

    Args:
        labels: Matrix labels, or None to auto-generate
        n_matrices: Number of matrices

    Returns:
        labels: Processed labels (auto-generated if input was None)

    Raises:
        ValueError: If number of labels doesn't match number of matrices

    """
    if labels is None:
        return ["" for _ in range(n_matrices)]

    if len(labels) != n_matrices:
        msg = (
            f"Number of labels ({len(labels)}) doesn't match "
            f"number of matrices ({n_matrices})"
        )
        raise ValueError(msg)

    return labels

validate_filepath

validate_filepath(filepath: Path | str) -> Path

Validate filepath exists and is readable.

PARAMETER DESCRIPTION
filepath

Path to file

TYPE: Path | str

RETURNS DESCRIPTION
path

Validated Path object

TYPE: Path

RAISES DESCRIPTION
FileNotFoundError

If file doesn't exist

ValueError

If path is not a file

Source code in src/rthor/_validation.py
def validate_filepath(filepath: Path | str) -> Path:
    """Validate filepath exists and is readable.

    Args:
        filepath: Path to file

    Returns:
        path: Validated Path object

    Raises:
        FileNotFoundError: If file doesn't exist
        ValueError: If path is not a file

    """
    path = Path(filepath)

    if not path.exists():
        msg = f"File not found: {path}"
        raise FileNotFoundError(msg)

    if not path.is_file():
        msg = f"Path is not a file: {path}"
        raise ValueError(msg)

    return path

validate_dataframe_list

validate_dataframe_list(df_list: list[DataFrame]) -> None

Validate list of DataFrames for RTHOR analysis.

PARAMETER DESCRIPTION
df_list

List of DataFrames

TYPE: list[DataFrame]

RAISES DESCRIPTION
ValueError

If DataFrames are invalid or inconsistent

Source code in src/rthor/_validation.py
def validate_dataframe_list(df_list: list[pd.DataFrame]) -> None:
    """Validate list of DataFrames for RTHOR analysis.

    Args:
        df_list: List of DataFrames

    Raises:
        ValueError: If DataFrames are invalid or inconsistent

    """
    if not df_list:
        msg = "DataFrame list is empty"
        raise ValueError(msg)

    if not all(isinstance(df, pd.DataFrame) for df in df_list):
        msg = "All elements must be pandas DataFrames"
        raise ValueError(msg)

    # Check all have same number of columns
    n_cols = len(df_list[0].columns)
    if not all(len(df.columns) == n_cols for df in df_list):
        msg = "All DataFrames must have the same number of columns"
        raise ValueError(msg)

    if n_cols < 2:
        msg = f"DataFrames must have at least 2 columns, got {n_cols}"
        raise ValueError(msg)

    # Check each DataFrame has enough rows for correlation
    for i, df in enumerate(df_list, 1):
        if len(df) < 2:
            msg = (
                f"DataFrame {i} has insufficient rows ({len(df)}) "
                f"for correlation analysis"
            )
            raise ValueError(msg)

        # Check for non-numeric columns
        non_numeric = df.select_dtypes(exclude=[np.number]).columns.tolist()
        if non_numeric:
            msg = (
                f"DataFrame {i} has non-numeric columns: {non_numeric}. "
                f"All columns must be numeric for correlation analysis."
            )
            raise ValueError(msg)

validate_correlation_matrices_3d

validate_correlation_matrices_3d(matrices: ndarray) -> None

Validate 3D array of correlation matrices.

PARAMETER DESCRIPTION
matrices

3D array of shape (n, n, n_matrices)

TYPE: ndarray

RAISES DESCRIPTION
ValueError

If array is invalid

Source code in src/rthor/_validation.py
def validate_correlation_matrices_3d(
    matrices: np.ndarray,
) -> None:
    """Validate 3D array of correlation matrices.

    Args:
        matrices: 3D array of shape (n, n, n_matrices)

    Raises:
        ValueError: If array is invalid

    """
    if matrices.ndim != 3:
        msg = f"Expected 3D array, got {matrices.ndim}D"
        raise ValueError(msg)

    if matrices.shape[0] != matrices.shape[1]:
        msg = (
            f"Expected square matrices (nxnxm), "
            f"got {matrices.shape[0]}x{matrices.shape[1]}x{matrices.shape[2]}"
        )
        raise ValueError(msg)

    n_matrices = matrices.shape[2]

    # Validate each matrix
    for i in range(n_matrices):
        validate_correlation_matrix(matrices[:, :, i], matrix_id=i + 1)

:::