Skip to content

Mean Analysis

circumplex.analysis.mean_analysis

Mean-based SSM analysis.

This module implements mean-based SSM analysis with bootstrap confidence intervals, supporting single-group and multi-group designs with optional contrast analysis.

FUNCTION DESCRIPTION
ssm_analyze_means

Perform mean-based SSM analysis.

ssm_analyze_means

ssm_analyze_means(data: DataFrame, scales: list[str] | list[int], angles: ndarray, grouping: str | None = None, boots: int = 2000, interval: float = 0.95, *, contrast: bool = False, listwise: bool = True, seed: int | None = None) -> dict[str, Any]

Perform mean-based SSM analysis.

Calculates SSM parameters from mean scale scores, optionally stratified by group, with bootstrap confidence intervals. Supports contrast analysis for comparing two groups.

PARAMETER DESCRIPTION
data

DataFrame containing circumplex scale scores

TYPE: DataFrame

scales

Column names or indices for circumplex scales (length n_scales)

TYPE: list[str] | list[int]

angles

Angular positions in radians (length n_scales)

TYPE: ndarray

grouping

Column name for grouping variable. If None, analyzes all data as one group.

TYPE: str | None DEFAULT: None

contrast

If True, calculate difference between two groups (requires exactly 2 groups)

TYPE: bool DEFAULT: False

boots

Number of bootstrap resamples

TYPE: int DEFAULT: 2000

interval

Confidence level (e.g., 0.95 for 95% CI)

TYPE: float DEFAULT: 0.95

listwise

If True, use listwise deletion. If False, use pairwise deletion.

TYPE: bool DEFAULT: True

seed

Random seed for reproducibility

TYPE: int | None DEFAULT: None

RETURNS DESCRIPTION
dict[str, Any]

Dictionary with keys:

  • results: DataFrame with parameters and confidence intervals
  • scores: DataFrame with mean scale scores per group
  • details: Dict with analysis metadata
  • type: mean
RAISES DESCRIPTION
ValueError

If contrast=True but number of groups is not 2

Examples:

>>> from circumplex.data import load_dataset
>>> from circumplex.utils.angles import OCTANTS, degrees_to_radians
>>> data = load_dataset('jz2017')
>>> angles = degrees_to_radians(OCTANTS)
>>> results = ssm_analyze_means(data, scales=['PA', 'BC', 'DE', 'FG',
...                                             'HI', 'JK', 'LM', 'NO'],
...                              angles=angles, boots=2000, seed=12345)
Notes

This function mirrors ssm_analyze_means() from the R package (R/ssm_analysis.R lines 179-276).

Source code in src/circumplex/analysis/mean_analysis.py
def ssm_analyze_means(  # noqa: PLR0915
    data: pd.DataFrame,
    scales: list[str] | list[int],
    angles: np.ndarray,
    grouping: str | None = None,
    boots: int = 2000,
    interval: float = 0.95,
    *,
    contrast: bool = False,
    listwise: bool = True,
    seed: int | None = None,
) -> dict[str, Any]:
    """Perform mean-based SSM analysis.

    Calculates SSM parameters from mean scale scores, optionally stratified
    by group, with bootstrap confidence intervals. Supports contrast analysis
    for comparing two groups.

    Parameters
    ----------
    data
        DataFrame containing circumplex scale scores
    scales
        Column names or indices for circumplex scales (length n_scales)
    angles
        Angular positions in radians (length n_scales)
    grouping
        Column name for grouping variable. If None, analyzes all data as one group.
    contrast
        If True, calculate difference between two groups (requires exactly 2 groups)
    boots
        Number of bootstrap resamples
    interval
        Confidence level (e.g., 0.95 for 95% CI)
    listwise
        If True, use listwise deletion. If False, use pairwise deletion.
    seed
        Random seed for reproducibility

    Returns
    -------
    :
        Dictionary with keys:

        - `results`: DataFrame with parameters and confidence intervals
        - `scores`: DataFrame with mean scale scores per group
        - `details`: Dict with analysis metadata
        - `type`: `mean`

    Raises
    ------
    ValueError
        If contrast=True but number of groups is not 2

    Examples
    --------
    >>> from circumplex.data import load_dataset
    >>> from circumplex.utils.angles import OCTANTS, degrees_to_radians
    >>> data = load_dataset('jz2017')
    >>> angles = degrees_to_radians(OCTANTS)
    >>> results = ssm_analyze_means(data, scales=['PA', 'BC', 'DE', 'FG',
    ...                                             'HI', 'JK', 'LM', 'NO'],
    ...                              angles=angles, boots=2000, seed=12345)

    Notes
    -----
    This function mirrors ssm_analyze_means() from the R package
    (R/ssm_analysis.R lines 179-276).

    """
    # Convert scale indices to names if needed
    if isinstance(scales[0], int):
        scale_names = [data.columns[i] for i in scales]
    else:
        scale_names = scales

    # Handle grouping
    if grouping is None:
        # Create synthetic "All" group
        group_labels = np.array(["All"] * len(data))
        group_col = "All"
        n_groups = 1
    else:
        group_col = data[grouping]
        # Convert to categorical and get labels
        if not isinstance(group_col.dtype, pd.CategoricalDtype):
            group_col = pd.Categorical(group_col)
        else:
            group_col = group_col.astype("category")

        group_labels = np.array(group_col)
        unique_groups = group_col.categories
        n_groups = len(unique_groups)

    # Validate contrast
    if contrast and n_groups != 2:
        msg = "Contrast can only be TRUE when comparing exactly 2 groups"
        raise ValueError(msg)

    # Prepare bootstrap input data
    bs_input = data[scale_names].copy()
    bs_input["__group__"] = group_labels

    # Apply listwise deletion if requested
    if listwise:
        bs_input = bs_input.dropna()

    # Convert groups to integer codes for internal use
    if grouping is None:
        group_codes = np.zeros(len(bs_input), dtype=int)
    else:
        group_categories = pd.Categorical(bs_input["__group__"])
        group_codes = group_categories.codes
        unique_groups = group_categories.categories

    # Calculate observed mean scores
    observed_scores = mean_scores(
        bs_input[scale_names].values, group_codes, listwise=listwise
    )

    # Add contrast row if requested
    if contrast:
        contrast_scores = observed_scores[1] - observed_scores[0]
        observed_scores = np.vstack([observed_scores, contrast_scores])

    # Define bootstrap function
    def bootstrap_fn(df: pd.DataFrame, indices: np.ndarray) -> np.ndarray:
        """Calculate SSM parameters for a bootstrap resample."""
        resampled = df.iloc[indices]
        scale_vals = resampled[scale_names].to_numpy()

        # Get group codes for resample
        if grouping is None:
            grp_codes = np.zeros(len(resampled), dtype=int)
        else:
            grp_categories = pd.Categorical(
                resampled["__group__"], categories=unique_groups
            )
            grp_codes = grp_categories.codes

        # Calculate mean scores for this resample
        scores_r = mean_scores(scale_vals, grp_codes, listwise=listwise)

        # Calculate SSM parameters
        params = group_parameters(scores_r, angles)

        # Add contrast if requested
        if contrast:
            contrast_params = param_diff_array(params)
            params = np.concatenate([params, contrast_params])

        return params

    # Perform bootstrap
    bs_results = ssm_bootstrap(
        bs_input,
        bootstrap_fn,
        boots=boots,
        grouping_col="__group__" if grouping is not None else None,
        seed=seed,
    )

    # Calculate confidence intervals
    results_df = calculate_confidence_intervals(bs_results)

    # Convert displacement from radians to degrees
    results_df["d_est"] = np.degrees(results_df["d_est"])
    if "d_lci" in results_df.columns:
        results_df["d_lci"] = np.degrees(results_df["d_lci"])
        results_df["d_uci"] = np.degrees(results_df["d_uci"])

    # Add metadata columns
    if grouping is None:
        results_df.insert(0, "Label", ["All"])
        group_labels_list = ["All"]
    else:
        group_labels_list = list(unique_groups)
        if contrast:
            contrast_label = f"{group_labels_list[1]} - {group_labels_list[0]}"
            group_labels_list.append(contrast_label)
        results_df.insert(0, "Label", group_labels_list)

    results_df.insert(1, "Group", group_labels_list)
    results_df.insert(2, "Measure", [None] * len(results_df))

    # Create scores DataFrame
    scores_df = pd.DataFrame(observed_scores, columns=scale_names)
    scores_df.insert(0, "Label", group_labels_list)

    # Prepare details
    details = {
        "boots": boots,
        "interval": interval,
        "listwise": listwise,
        "angles": np.degrees(angles).tolist(),
        "contrast": contrast,
        "score_type": "mean",
    }

    return {
        "results": results_df,
        "scores": scores_df,
        "details": details,
        "type": "mean",
    }