Tidying Functions¶

circumplex.utils.tidying_functions ¶

Utility functions for data tidying and instrument scoring.

FUNCTION	DESCRIPTION
`ipsatize`	Ipsatize item-level data by centering within individuals.
`score`	Score item-level data using a circumplex instrument.
`norm_standardize`	Standardize scale-level data using normative sample statistics.

ipsatize ¶

ipsatize(data: DataFrame, items: Iterable[str | int], prefix: str = '', suffix: str = '_i', *, na_rm: bool = True, append: bool = True) -> pd.DataFrame

Ipsatize item-level data by centering within individuals.

PARAMETER	DESCRIPTION
`data`	DataFrame containing item-level data. TYPE: `DataFrame`
`items`	Tuple of column names corresponding to item-level data to ipsatize. TYPE: `tuple[str]`
`prefix`	Prefix to add to ipsatized column names, by default "". TYPE: `str` DEFAULT: `''`
`suffix`	Suffix to add to ipsatized column names, by default "". TYPE: `str` DEFAULT: `'_i'`
`na_rm`	Whether to remove NAs when computing individual means, by default True. TYPE: `bool` DEFAULT: `True`
`append`	Whether to append ipsatized columns to the original DataFrame, or return only the ipsatized columns, by default True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with ipsatized item-level data.

RAISES	DESCRIPTION
`TypeError`	If `data` is not a DataFrame or `items` is not a sequence.
`ValueError`	If any item in `items` is not a column in `data`.

Source code in src/circumplex/utils/tidying_functions.py

def ipsatize(
    data: pd.DataFrame,
    items: Iterable[str | int],
    prefix: str = "",
    suffix: str = "_i",
    *,
    na_rm: bool = True,
    append: bool = True,
) -> pd.DataFrame:
    """Ipsatize item-level data by centering within individuals.

    Parameters
    ----------
    data : pd.DataFrame
        DataFrame containing item-level data.
    items : tuple[str]
        Tuple of column names corresponding to item-level data to ipsatize.
    prefix : str, optional
        Prefix to add to ipsatized column names, by default "".
    suffix : str, optional
        Suffix to add to ipsatized column names, by default "".
    na_rm : bool, optional
        Whether to remove NAs when computing individual means, by default True.
    append : bool, optional
        Whether to append ipsatized columns to the original DataFrame,
        or return only the ipsatized columns, by default True.

    Returns
    -------
    :
        DataFrame with ipsatized item-level data.

    Raises
    ------
    TypeError
        If `data` is not a DataFrame or `items` is not a sequence.
    ValueError
        If any item in `items` is not a column in `data`.
    """
    if not isinstance(data, pd.DataFrame):
        msg = "Input 'data' must be a pandas DataFrame."
        raise TypeError(msg)
    if isinstance(items, str):
        msg = "Input 'items' must be a sequence of column names."
        raise TypeError(msg)
    if not isinstance(items, Iterable):
        msg = "Input 'items' must be a sequence of column names."
        raise TypeError(msg)

    if all(isinstance(item, str) for item in items):
        if all(item in data.columns for item in items):
            item_data = data.loc[:, list(items)].copy()
        else:
            msg = "All items in 'items' must be valid column names in 'data'."
            raise ValueError(msg)
    elif all(isinstance(item, (int, np.integer, float, np.floating)) for item in items):
        numeric_items = [int(item) for item in items]
        if all(0 <= idx < data.shape[1] for idx in numeric_items):
            item_data = data.iloc[:, numeric_items].copy()
        else:
            msg = "All items in 'items' must be valid indices in 'data'."
            raise ValueError(msg)
    else:
        msg = "All items in 'items' must be either strings or integers."
        raise TypeError(msg)

    rmean = item_data.mean(axis=1, skipna=na_rm)
    scores = item_data.subtract(rmean, axis=0)
    scores.columns = [f"{prefix}{item}{suffix}" for item in items]

    if append:
        return pd.concat([data, scores], axis=1)
    return scores

score ¶

score(data: DataFrame, items: Iterable[str | int], instrument: Instrument | str, prefix: str = '', suffix: str = '', *, na_rm: bool = True, append: bool = True) -> pd.DataFrame

Score item-level data using a circumplex instrument.

PARAMETER	DESCRIPTION
`data`	DataFrame containing at least circumplex scales. TYPE: `DataFrame`
`items`	The variable names or column numbers for the variables in `data` that contain all the circumplex items from a single circumplex measure, in ascending order from item 1 to item N. TYPE: `Iterable[str \| int]`
`instrument`	An instrument object from the package. To see the available TYPE: `Instrument \| str`
`prefix`	Prefix to add to scored column names, by default "". TYPE: `str` DEFAULT: `''`
`suffix`	Suffix to add to scored column names, by default "". TYPE: `str` DEFAULT: `''`
`na_rm`	Whether to remove NAs when computing individual means, by default True. TYPE: `bool` DEFAULT: `True`
`append`	Whether to append scored columns to the original DataFrame, or return only the scored columns, by default True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with scored scale-level data.

RAISES	DESCRIPTION
`TypeError`	If `data` is not a DataFrame or `items` is not a sequence.
`ValueError`	If any item in `items` is not a column in `data`.

Source code in src/circumplex/utils/tidying_functions.py

def score(
    data: pd.DataFrame,
    items: Iterable[str | int],
    instrument: Instrument | str,
    prefix: str = "",
    suffix: str = "",
    *,
    na_rm: bool = True,
    append: bool = True,
) -> pd.DataFrame:
    """Score item-level data using a circumplex instrument.

    Parameters
    ----------
    data
        DataFrame containing at least circumplex scales.
    items
        The variable names or column numbers for the variables in `data`
        that contain all the circumplex items from a single circumplex measure,
        in ascending order from item 1 to item N.
    instrument
        An instrument object from the package. To see the available
    prefix : str, optional
        Prefix to add to scored column names, by default "".
    suffix : str, optional
        Suffix to add to scored column names, by default "".
    na_rm : bool, optional
        Whether to remove NAs when computing individual means, by default True.
    append : bool, optional
        Whether to append scored columns to the original DataFrame,
        or return only the scored columns, by default True.

    Returns
    -------
    :
        DataFrame with scored scale-level data.

    Raises
    ------
    TypeError
        If `data` is not a DataFrame or `items` is not a sequence.
    ValueError
        If any item in `items` is not a column in `data`.
    """
    # Validate inputs
    # -- validate data and items first (before trying to get instrument)
    if not isinstance(data, pd.DataFrame):
        msg = "Input 'data' must be a pandas DataFrame."
        raise TypeError(msg)
    if isinstance(items, str):
        msg = "Input 'items' must be a sequence of column names."
        raise TypeError(msg)
    if not isinstance(items, Iterable):
        msg = "Input 'items' must be a sequence of column names or indices."
        raise TypeError(msg)

    # -- get instrument if a string is provided
    if isinstance(instrument, str):
        instrument = get_instrument(instrument)
    if not isinstance(instrument, Instrument):
        msg = "Input 'instrument' must be an Instrument instance."
        raise TypeError(msg)

    return instrument.score(
        data,
        items,
        prefix=prefix,
        suffix=suffix,
        na_rm=na_rm,
        append=append,
    )

norm_standardize ¶

norm_standardize(data: DataFrame, instrument: Instrument | str, sample_id: int, scales: Iterable[str | int] | None = None, prefix: str = '', suffix: str = '_z', *, append: bool = True) -> pd.DataFrame

Standardize scale-level data using normative sample statistics.

PARAMETER	DESCRIPTION
`data`	DataFrame containing scale-level data. TYPE: `DataFrame`
`scales`	Tuple of column names or indices corresponding to scale-level data to standardize. TYPE: `tuple[str \| int]` DEFAULT: `None`
`instrument`	An instrument object from the package. To see the available instruments, use `show_instruments()`. TYPE: `Instrument \| str`
`sample_id`	The ID of the normative sample to use for standardization. TYPE: `int \| str`
`prefix`	Prefix to add to standardized column names, by default "". TYPE: `str` DEFAULT: `''`
`suffix`	Suffix to add to standardized column names, by default "_z". TYPE: `str` DEFAULT: `'_z'`
`append`	Whether to append standardized columns to the original DataFrame, or return only the standardized columns, by default True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with standardized scale-level data.

RAISES	DESCRIPTION
`TypeError`	If `data` is not a DataFrame or `scales` is not a sequence.
`ValueError`	If any scale in `scales` is not a column in `data`.

Source code in src/circumplex/utils/tidying_functions.py

def norm_standardize(
    data: pd.DataFrame,
    instrument: Instrument | str,
    sample_id: int,
    scales: Iterable[str | int] | None = None,
    prefix: str = "",
    suffix: str = "_z",
    *,
    append: bool = True,
) -> pd.DataFrame:
    """Standardize scale-level data using normative sample statistics.

    Parameters
    ----------
    data : pd.DataFrame
        DataFrame containing scale-level data.
    scales : tuple[str | int]
        Tuple of column names or indices corresponding to scale-level data
        to standardize.
    instrument : Instrument | str
        An instrument object from the package. To see the available
        instruments, use `show_instruments()`.
    sample_id : int | str
        The ID of the normative sample to use for standardization.
    prefix : str, optional
        Prefix to add to standardized column names, by default "".
    suffix : str, optional
        Suffix to add to standardized column names, by default "_z".
    append : bool, optional
        Whether to append standardized columns to the original DataFrame,
        or return only the standardized columns, by default True.

    Returns
    -------
    :
        DataFrame with standardized scale-level data.

    Raises
    ------
    TypeError
        If `data` is not a DataFrame or `scales` is not a sequence.
    ValueError
        If any scale in `scales` is not a column in `data`.
    """
    # Validate inputs
    # -- validate data and scales first (before trying to get instrument)
    if not isinstance(data, pd.DataFrame):
        msg = "Input 'data' must be a pandas DataFrame."
        raise TypeError(msg)
    if isinstance(scales, str):
        msg = "Input 'scales' must be a sequence of column names."
        raise TypeError(msg)
    if not isinstance(scales, Iterable) and scales is not None:
        msg = "Input 'scales' must be a sequence of column names or indices."
        raise TypeError(msg)

    # -- get instrument if a string is provided
    if isinstance(instrument, str):
        instrument = get_instrument(instrument)
    if not isinstance(instrument, Instrument):
        msg = "Input 'instrument' must be an Instrument instance."
        raise TypeError(msg)

    return instrument.norm_standardize(
        data,
        int(sample_id),
        scales=scales,
        prefix=prefix,
        suffix=suffix,
        append=append,
    )