Surveys¶

soundscapy.surveys ¶

Soundscapy Surveys Package.

This package handles the processing and analysis of soundscape surveys, including PAQ (Perceived Affective Quality) data and ISO coordinate calculations.

MODULE	DESCRIPTION
`processing`	Soundscape survey data processing module.
`survey_utils`	Core utility functions for processing soundscape survey data.

FUNCTION	DESCRIPTION
`add_iso_coords`	Calculate and add ISO coordinates as new columns in the DataFrame.
`calculate_iso_coords`	Calculate the projected ISOPleasant and ISOEventful coordinates.
`ipsatize`	Participant-level ipsatization for circumplex analysis.
`return_paqs`	Return only the PAQ columns from a DataFrame.
`simulation`	Generate random PAQ responses for simulation purposes.
`rename_paqs`	Rename the PAQ columns in a DataFrame to standard PAQ IDs.

add_iso_coords ¶

add_iso_coords(
    data: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame

Calculate and add ISO coordinates as new columns in the DataFrame.

PARAMETER	DESCRIPTION
`data`	Input DataFrame containing PAQ data TYPE: `DataFrame`
`val_range`	(min, max) range of original PAQ responses, by default (1, 5) TYPE: `tuple[int, int]` DEFAULT: `(1, 5)`
`names`	Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful") TYPE: `tuple[str, str]` DEFAULT: `('ISOPleasant', 'ISOEventful')`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...]` DEFAULT: `EQUAL_ANGLES`
`overwrite`	Whether to overwrite existing ISO coordinate columns, by default False TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with new ISO coordinate columns added

RAISES	DESCRIPTION
`Warning`	If ISO coordinate columns already exist and overwrite is False

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> df_with_iso = add_iso_coords(df)
>>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
   ISOPleasant  ISOEventful
0        -0.03        -0.28
1         0.47         0.18

Source code in src/soundscapy/surveys/processing.py

def add_iso_coords(
    data: pd.DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame:
    """
    Calculate and add ISO coordinates as new columns in the DataFrame.

    Parameters
    ----------
    data
        Input DataFrame containing PAQ data
    val_range
        (min, max) range of original PAQ responses, by default (1, 5)
    names
        Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    overwrite
        Whether to overwrite existing ISO coordinate columns, by default False

    Returns
    -------
    :
        DataFrame with new ISO coordinate columns added

    Raises
    ------
    Warning
        If ISO coordinate columns already exist and overwrite is False

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> df_with_iso = add_iso_coords(df)
    >>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
       ISOPleasant  ISOEventful
    0        -0.03        -0.28
    1         0.47         0.18

    """
    for name in names:
        if name in data.columns:
            if overwrite:
                data = data.drop(name, axis=1)
            else:
                msg = (
                    f"{name} already in dataframe. Use `overwrite=True` to replace it."
                )
                raise Warning(msg)

    iso_pleasant, iso_eventful = calculate_iso_coords(
        data, val_range=val_range, angles=angles
    )
    data = data.assign(**{names[0]: iso_pleasant, names[1]: iso_eventful})

    logger.info(f"Added ISO coordinates to DataFrame with column names: {names}")
    return data

calculate_iso_coords ¶

calculate_iso_coords(
    results_df: DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]

Calculate the projected ISOPleasant and ISOEventful coordinates.

PARAMETER	DESCRIPTION
`results_df`	DataFrame containing PAQ data. TYPE: `DataFrame`
`val_range`	(max, min) range of original PAQ responses, by default (5, 1) TYPE: `tuple[int, int]` DEFAULT: `(5, 1)`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...]` DEFAULT: `EQUAL_ANGLES`

RETURNS	DESCRIPTION
`tuple[Series, Series]`	ISOPleasant and ISOEventful coordinate values

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
>>> iso_pleasant.round(2)
0   -0.03
1    0.47
dtype: float64
>>> iso_eventful.round(2)
0   -0.28
1    0.18
dtype: float64

Source code in src/soundscapy/surveys/processing.py

def calculate_iso_coords(
    results_df: pd.DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]:
    """
    Calculate the projected ISOPleasant and ISOEventful coordinates.

    Parameters
    ----------
    results_df
        DataFrame containing PAQ data.
    val_range
        (max, min) range of original PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        ISOPleasant and ISOEventful coordinate values

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
    >>> iso_pleasant.round(2)
    0   -0.03
    1    0.47
    dtype: float64
    >>> iso_eventful.round(2)
    0   -0.28
    1    0.18
    dtype: float64

    """
    scale = max(val_range) - min(val_range)

    paq_df = return_paqs(results_df, incl_ids=False)

    iso_pleasant = paq_df.apply(lambda row: _adj_iso_pl(row, angles, scale), axis=1)
    iso_eventful = paq_df.apply(lambda row: _adj_iso_ev(row, angles, scale), axis=1)

    logger.info(f"Calculated ISO coordinates for {len(results_df)} samples")
    return iso_pleasant, iso_eventful

ipsatize ¶

ipsatize(
    data: DataFrame,
    method: Literal[
        "grand_mean", "column_wise", "row_wise"
    ] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame

Participant-level ipsatization for circumplex analysis.

Removes systematic response biases before computing a correlation matrix. The choice of method depends on the study design and the type of bias being corrected.

PARAMETER	DESCRIPTION
`data`	DataFrame containing PAQ scale columns and (for participant-level methods) a grouping column. TYPE: `DataFrame`
`method`	Centering strategy: `"grand_mean"` (default) — one scalar per participant: the mean across all PAQ values and all observations for that participant. Removes overall response-level differences between participants. Matches the published SATP analysis (Aletta et al., 2024) and the original R implementation. `"column_wise"` — eight scalars per participant: the per-scale mean across that participant's observations. Removes scale-specific response biases. This is the behaviour of the legacy :func:`person_center` function. `"row_wise"` — one scalar per observation: the mean across all PAQ scales within that observation. Removes the general impression of each individual soundscape stimulus. Equivalent to `circumplex.ipsatize()`. TYPE: `Literal['grand_mean', 'column_wise', 'row_wise']` DEFAULT: `'grand_mean'`
`participant_col`	Column used to group observations by participant. Required for `"grand_mean"` and `"column_wise"`; ignored for `"row_wise"`. TYPE: `str` DEFAULT: `'participant'`
`scales`	PAQ column names to centre. Defaults to :data:`PAQ_IDS` when `None`. TYPE: `list[str] \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame containing only the scale columns with centred values. The `participant_col` grouping column is excluded from the result.

RAISES	DESCRIPTION
`KeyError`	If `participant_col` is not present in `data` when `method` is `"grand_mean"` or `"column_wise"`.

Examples:

>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
...     'participant': ['A', 'A', 'B', 'B'],
... })
>>> result = ipsatize(data, method="grand_mean")
>>> result['PAQ1'].tolist()
[-5.0, 5.0, 5.0, -5.0]

Source code in src/soundscapy/surveys/processing.py

def ipsatize(
    data: pd.DataFrame,
    method: Literal["grand_mean", "column_wise", "row_wise"] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame:
    """
    Participant-level ipsatization for circumplex analysis.

    Removes systematic response biases before computing a correlation matrix.
    The choice of method depends on the study design and the type of bias
    being corrected.

    Parameters
    ----------
    data
        DataFrame containing PAQ scale columns and (for participant-level
        methods) a grouping column.
    method
        Centering strategy:

        ``"grand_mean"`` *(default)* — one scalar per participant: the mean
        across *all* PAQ values and *all* observations for that participant.
        Removes overall response-level differences between participants.
        **Matches the published SATP analysis (Aletta et al., 2024) and the
        original R implementation.**

        ``"column_wise"`` — eight scalars per participant: the per-scale mean
        across that participant's observations.  Removes scale-specific
        response biases.  This is the behaviour of the legacy
        :func:`person_center` function.

        ``"row_wise"`` — one scalar per observation: the mean across all PAQ
        scales within that observation.  Removes the general impression of
        each individual soundscape stimulus.  Equivalent to
        ``circumplex.ipsatize()``.
    participant_col
        Column used to group observations by participant.  Required for
        ``"grand_mean"`` and ``"column_wise"``; ignored for ``"row_wise"``.
    scales
        PAQ column names to centre.  Defaults to :data:`PAQ_IDS` when
        ``None``.

    Returns
    -------
    :
        DataFrame containing only the scale columns with centred values.
        The ``participant_col`` grouping column is excluded from the result.

    Raises
    ------
    KeyError
        If ``participant_col`` is not present in ``data`` when
        ``method`` is ``"grand_mean"`` or ``"column_wise"``.

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
    ...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
    ...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
    ...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
    ...     'participant': ['A', 'A', 'B', 'B'],
    ... })
    >>> result = ipsatize(data, method="grand_mean")
    >>> result['PAQ1'].tolist()
    [-5.0, 5.0, 5.0, -5.0]

    """
    _scales = scales if scales is not None else PAQ_IDS

    if method == "column_wise":
        means = data.groupby(participant_col)[_scales].transform("mean")
        return data[_scales] - means

    if method == "grand_mean":
        # Compute a single scalar per participant: mean across all PAQ values
        # and all observations for that participant.  Use nanmean so that
        # participants with partial NaN data still get a valid grand mean
        # computed from their non-NaN values; NaN rows are then removed by
        # downstream listwise deletion rather than silently expanding data loss
        # to the whole participant.
        grand_means = data.groupby(participant_col)[_scales].apply(
            lambda df: float(np.nanmean(df.values))
        )
        grand_mean_per_row = data[participant_col].map(grand_means)
        return data[_scales].subtract(grand_mean_per_row, axis=0)

    if method == "row_wise":
        row_means = data[_scales].mean(axis=1)
        return data[_scales].sub(row_means, axis=0)

    msg = f"method must be 'grand_mean', 'column_wise', or 'row_wise'; got {method!r}"
    raise ValueError(msg)

return_paqs ¶

return_paqs(
    df: DataFrame,
    other_cols: list[str] | None = None,
    *,
    incl_ids: bool = True,
) -> pd.DataFrame

Return only the PAQ columns from a DataFrame.

PARAMETER	DESCRIPTION
`df`	Input DataFrame containing PAQ data. TYPE: `DataFrame`
`other_cols`	Other columns to include in the output, by default None. TYPE: `list[str] \| None` DEFAULT: `None`
`incl_ids`	Whether to include ID columns (RecordID, GroupID, etc.), by default True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame containing only the PAQ columns and optionally ID and other specified columns.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'RecordID': [1, 2],
...     'PAQ1': [4, 3],
...     'PAQ2': [2, 5],
...     'PAQ3': [1, 2],
...     'PAQ4': [3, 4],
...     'PAQ5': [5, 1],
...     'PAQ6': [2, 3],
...     'PAQ7': [4, 5],
...     'PAQ8': [1, 2],
...     'OtherCol': ['A', 'B']
... })
>>> return_paqs(df)
   RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
0         1     4     2     1     3     5     2     4     1
1         2     3     5     2     4     1     3     5     2
>>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
   PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
0     4     2     1     3     5     2     4     1        A
1     3     5     2     4     1     3     5     2        B

Source code in src/soundscapy/surveys/survey_utils.py

def return_paqs(
    df: pd.DataFrame, other_cols: list[str] | None = None, *, incl_ids: bool = True
) -> pd.DataFrame:
    """
    Return only the PAQ columns from a DataFrame.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    other_cols
        Other columns to include in the output, by default None.
    incl_ids
        Whether to include ID columns (RecordID, GroupID, etc.), by default True.

    Returns
    -------
    :
        DataFrame containing only the PAQ columns and optionally ID and other specified
        columns.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'RecordID': [1, 2],
    ...     'PAQ1': [4, 3],
    ...     'PAQ2': [2, 5],
    ...     'PAQ3': [1, 2],
    ...     'PAQ4': [3, 4],
    ...     'PAQ5': [5, 1],
    ...     'PAQ6': [2, 3],
    ...     'PAQ7': [4, 5],
    ...     'PAQ8': [1, 2],
    ...     'OtherCol': ['A', 'B']
    ... })
    >>> return_paqs(df)
       RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
    0         1     4     2     1     3     5     2     4     1
    1         2     3     5     2     4     1     3     5     2
    >>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
       PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
    0     4     2     1     3     5     2     4     1        A
    1     3     5     2     4     1     3     5     2        B

    """
    cols = PAQ_IDS.copy()

    if incl_ids:
        id_cols = [
            name
            for name in ["RecordID", "GroupID", "SessionID", "LocationID"]
            if name in df.columns
        ]
        cols = id_cols + cols

    if other_cols:
        cols.extend(other_cols)

    logger.debug(f"Returning PAQ columns: {cols}")
    return df[cols]

simulation ¶

simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame

Generate random PAQ responses for simulation purposes.

PARAMETER	DESCRIPTION
`n`	Number of samples to simulate, by default 3000 TYPE: `int` DEFAULT: `3000`
`val_range`	Range of values for PAQ responses, by default (1, 5) TYPE: `tuple[int, int]` DEFAULT: `(1, 5)`
`seed`	Optional random seed for deterministic output, by default None TYPE: `int \| None` DEFAULT: `None`
`incl_iso_coords`	Whether to add calculated ISO coordinates, by default False TYPE: `bool` DEFAULT: `False`
`**coord_kwargs`	Optional keyword arguments passed directly to the `add_iso_coords` function if `incl_iso_coords` is True. These can include: `names` (tuple[str, str]): Names for the new ISO coordinate columns. `angles` (tuple[int, ...]): Angles for each PAQ used in calculation. `overwrite` (bool): Whether to overwrite existing ISO coordinate columns. TYPE: `Unpack[_AddISOCoordsKwargs]` DEFAULT: `{}`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame of randomly generated PAQ responses

Examples:

>>> data = simulation(n=5,incl_iso_coords=True)
>>> data.shape
(5, 10)
>>> list(data.columns)
['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

Source code in src/soundscapy/surveys/processing.py

def simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame:
    """
    Generate random PAQ responses for simulation purposes.

    Parameters
    ----------
    n
        Number of samples to simulate, by default 3000
    val_range
        Range of values for PAQ responses, by default (1, 5)
    seed
        Optional random seed for deterministic output, by default None
    incl_iso_coords
        Whether to add calculated ISO coordinates, by default False
    **coord_kwargs
        Optional keyword arguments passed directly to the `add_iso_coords` function
        if `incl_iso_coords` is True. These can include:

        - `names` (tuple[str, str]): Names for the new ISO coordinate columns.
        - `angles` (tuple[int, ...]): Angles for each PAQ used in calculation.
        - `overwrite` (bool): Whether to overwrite existing ISO coordinate columns.

    Returns
    -------
    :
        DataFrame of randomly generated PAQ responses

    Examples
    --------
    >>> data = simulation(n=5,incl_iso_coords=True)
    >>> data.shape
    (5, 10)
    >>> list(data.columns)
    ['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

    """  # noqa: E501
    data = pd.DataFrame(
        np.random.default_rng(seed).integers(
            min(val_range), max(val_range) + 1, size=(n, 8)
        ),
        columns=PAQ_IDS,
    )

    if incl_iso_coords:
        data = add_iso_coords(data, val_range=val_range, **coord_kwargs)

    logger.info(f"Generated simulated PAQ data with {n} samples")
    return data

rename_paqs ¶

rename_paqs(
    df: DataFrame,
    paq_aliases: list | tuple | dict | None = None,
) -> pd.DataFrame

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

PARAMETER	DESCRIPTION
`df`	Input DataFrame containing PAQ data. TYPE: `DataFrame`
`paq_aliases`	Specify which PAQs are to be renamed. If None, will check if the column names are in pre-defined options. If a tuple, the order must match PAQ_IDS. If a dict, keys are current names and values are desired PAQ IDs. TYPE: `list \| tuple \| dict \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with renamed PAQ columns.

RAISES	DESCRIPTION
`ValueError`	If paq_aliases is not a tuple, list, or dictionary.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'pleasant': [4, 3],
...     'vibrant': [2, 5],
...     'other_col': [1, 2]
... })
>>> rename_paqs(df)
   PAQ1  PAQ2  other_col
0     4     2          1
1     3     5          2
>>> df_custom = pd.DataFrame({
...     'pl': [4, 3],
...     'vb': [2, 5],
... })
>>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
   PAQ1  PAQ2
0     4     2
1     3     5

Source code in src/soundscapy/surveys/survey_utils.py

def rename_paqs(
    df: pd.DataFrame, paq_aliases: list | tuple | dict | None = None
) -> pd.DataFrame:
    """
    Rename the PAQ columns in a DataFrame to standard PAQ IDs.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    paq_aliases
        Specify which PAQs are to be renamed. If None, will check if the column names
        are in pre-defined options. If a tuple, the order must match PAQ_IDS.
        If a dict, keys are current names and values are desired PAQ IDs.

    Returns
    -------
    :
        DataFrame with renamed PAQ columns.

    Raises
    ------
    ValueError
        If paq_aliases is not a tuple, list, or dictionary.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'pleasant': [4, 3],
    ...     'vibrant': [2, 5],
    ...     'other_col': [1, 2]
    ... })
    >>> rename_paqs(df)
       PAQ1  PAQ2  other_col
    0     4     2          1
    1     3     5          2
    >>> df_custom = pd.DataFrame({
    ...     'pl': [4, 3],
    ...     'vb': [2, 5],
    ... })
    >>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
       PAQ1  PAQ2
    0     4     2
    1     3     5

    """
    if paq_aliases is None:
        if any(paq_id in df.columns for paq_id in PAQ_IDS):
            logger.info("PAQs already correctly named.")
            return df
        if any(paq_name in df.columns for paq_name in PAQ_LABELS):
            paq_aliases = PAQ_LABELS

    if isinstance(paq_aliases, list | tuple):
        rename_dict = dict(zip(paq_aliases, PAQ_IDS, strict=False))
    elif isinstance(paq_aliases, dict):
        rename_dict = paq_aliases
    else:
        msg = "paq_aliases must be a tuple, list, or dictionary."
        raise TypeError(msg)

    logger.debug(f"Renaming PAQs with the following mapping: {rename_dict}")
    return df.rename(columns=rename_dict)

Processing¶

soundscapy.surveys.processing ¶

Soundscape survey data processing module.

This module contains functions for processing and analyzing soundscape survey data, including ISO coordinate calculations, data quality checks, and SSM metrics.

Notes

The functions in this module are designed to be fairly general and can be used with any dataset in a similar format to the ISD. The key to this is using a simple dataframe/sheet with the following columns:

Index columns: e.g. LocationID, RecordID, GroupID, SessionID
Perceptual attributes: PAQ1, PAQ2, ..., PAQ8
Independent variables: e.g. Laeq, N5, Sharpness, etc.

The key functions of this module are designed to clean/validate datasets, calculate ISO coordinate values or SSM metrics, filter on index columns. Functions and operations which are specific to a particular dataset are located in their own modules under soundscape.databases.

CLASS	DESCRIPTION
`ISOCoordinates`	Dataclass for storing ISO coordinates.
`SSMMetrics`	Dataclass for storing Structural Summary Method (SSM) metrics.

FUNCTION	DESCRIPTION
`calculate_iso_coords`	Calculate the projected ISOPleasant and ISOEventful coordinates.
`add_iso_coords`	Calculate and add ISO coordinates as new columns in the DataFrame.
`likert_data_quality`	Perform basic quality checks on PAQ (Likert scale) data.
`simulation`	Generate random PAQ responses for simulation purposes.
`ssm_metrics`	Calculate the Structural Summary Method (SSM) metrics for each response.
`ssm_cosine_fit`	Fit a cosine model to the PAQ data for SSM analysis.
`ipsatize`	Participant-level ipsatization for circumplex analysis.

ISOCoordinates `dataclass` ¶

ISOCoordinates(pleasant: float, eventful: float)

Dataclass for storing ISO coordinates.

SSMMetrics `dataclass` ¶

SSMMetrics(
    amplitude: float,
    angle: float,
    elevation: float,
    displacement: float,
    r_squared: float,
)

Dataclass for storing Structural Summary Method (SSM) metrics.

METHOD	DESCRIPTION
`table`	Generate a pandas Series containing specific attributes of the instance.

table ¶

table() -> pd.Series

Generate a pandas Series containing specific attributes of the instance.

This method collects the values of the instance attributes related to amplitude, angle, elevation, displacement, and r_squared, and organizes them into a pandas Series. It is useful for presenting the data in a structured format suitable for further processing or analysis.

RETURNS DESCRIPTION

Series

A pandas Series containing the following key-value pairs:

"amplitude": instance attribute representing a certain magnitude.
"angle": instance attribute representing a specific angular measurement.
"elevation": instance attribute indicating a height or vertical position.
"displacement": instance attribute defining the movement or shift.
"r_squared": instance attribute denoting coefficient of determination.

Source code in src/soundscapy/surveys/processing.py

def table(self) -> pd.Series:
    """
    Generate a pandas Series containing specific attributes of the instance.

    This method collects the values of the instance attributes related to
    amplitude, angle, elevation, displacement, and r_squared, and organizes
    them into a pandas Series. It is useful for presenting the data in a
    structured format suitable for further processing or analysis.

    Returns
    -------
    :
        A pandas Series containing the following key-value pairs:

        - "amplitude": instance attribute representing a certain magnitude.
        - "angle": instance attribute representing a specific angular measurement.
        - "elevation": instance attribute indicating a height or vertical position.
        - "displacement": instance attribute defining the movement or shift.
        - "r_squared": instance attribute denoting coefficient of determination.

    """
    return pd.Series(
        {
            "amplitude": self.amplitude,
            "angle": self.angle,
            "elevation": self.elevation,
            "displacement": self.displacement,
            "r_squared": self.r_squared,
        }
    )

calculate_iso_coords ¶

calculate_iso_coords(
    results_df: DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]

Calculate the projected ISOPleasant and ISOEventful coordinates.

PARAMETER	DESCRIPTION
`results_df`	DataFrame containing PAQ data. TYPE: `DataFrame`
`val_range`	(max, min) range of original PAQ responses, by default (5, 1) TYPE: `tuple[int, int]` DEFAULT: `(5, 1)`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...]` DEFAULT: `EQUAL_ANGLES`

RETURNS	DESCRIPTION
`tuple[Series, Series]`	ISOPleasant and ISOEventful coordinate values

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
>>> iso_pleasant.round(2)
0   -0.03
1    0.47
dtype: float64
>>> iso_eventful.round(2)
0   -0.28
1    0.18
dtype: float64

Source code in src/soundscapy/surveys/processing.py

def calculate_iso_coords(
    results_df: pd.DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]:
    """
    Calculate the projected ISOPleasant and ISOEventful coordinates.

    Parameters
    ----------
    results_df
        DataFrame containing PAQ data.
    val_range
        (max, min) range of original PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        ISOPleasant and ISOEventful coordinate values

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
    >>> iso_pleasant.round(2)
    0   -0.03
    1    0.47
    dtype: float64
    >>> iso_eventful.round(2)
    0   -0.28
    1    0.18
    dtype: float64

    """
    scale = max(val_range) - min(val_range)

    paq_df = return_paqs(results_df, incl_ids=False)

    iso_pleasant = paq_df.apply(lambda row: _adj_iso_pl(row, angles, scale), axis=1)
    iso_eventful = paq_df.apply(lambda row: _adj_iso_ev(row, angles, scale), axis=1)

    logger.info(f"Calculated ISO coordinates for {len(results_df)} samples")
    return iso_pleasant, iso_eventful

add_iso_coords ¶

add_iso_coords(
    data: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame

Calculate and add ISO coordinates as new columns in the DataFrame.

PARAMETER	DESCRIPTION
`data`	Input DataFrame containing PAQ data TYPE: `DataFrame`
`val_range`	(min, max) range of original PAQ responses, by default (1, 5) TYPE: `tuple[int, int]` DEFAULT: `(1, 5)`
`names`	Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful") TYPE: `tuple[str, str]` DEFAULT: `('ISOPleasant', 'ISOEventful')`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...]` DEFAULT: `EQUAL_ANGLES`
`overwrite`	Whether to overwrite existing ISO coordinate columns, by default False TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with new ISO coordinate columns added

RAISES	DESCRIPTION
`Warning`	If ISO coordinate columns already exist and overwrite is False

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> df_with_iso = add_iso_coords(df)
>>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
   ISOPleasant  ISOEventful
0        -0.03        -0.28
1         0.47         0.18

Source code in src/soundscapy/surveys/processing.py

def add_iso_coords(
    data: pd.DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame:
    """
    Calculate and add ISO coordinates as new columns in the DataFrame.

    Parameters
    ----------
    data
        Input DataFrame containing PAQ data
    val_range
        (min, max) range of original PAQ responses, by default (1, 5)
    names
        Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    overwrite
        Whether to overwrite existing ISO coordinate columns, by default False

    Returns
    -------
    :
        DataFrame with new ISO coordinate columns added

    Raises
    ------
    Warning
        If ISO coordinate columns already exist and overwrite is False

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> df_with_iso = add_iso_coords(df)
    >>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
       ISOPleasant  ISOEventful
    0        -0.03        -0.28
    1         0.47         0.18

    """
    for name in names:
        if name in data.columns:
            if overwrite:
                data = data.drop(name, axis=1)
            else:
                msg = (
                    f"{name} already in dataframe. Use `overwrite=True` to replace it."
                )
                raise Warning(msg)

    iso_pleasant, iso_eventful = calculate_iso_coords(
        data, val_range=val_range, angles=angles
    )
    data = data.assign(**{names[0]: iso_pleasant, names[1]: iso_eventful})

    logger.info(f"Added ISO coordinates to DataFrame with column names: {names}")
    return data

likert_data_quality ¶

likert_data_quality(
    df: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    *,
    allow_na: bool = False,
) -> list[int] | None

Perform basic quality checks on PAQ (Likert scale) data.

PARAMETER	DESCRIPTION
`df`	DataFrame containing PAQ data TYPE: `DataFrame`
`allow_na`	Whether to allow NaN values in PAQ data, by default False TYPE: `bool` DEFAULT: `False`
`val_range`	Valid range for PAQ values, by default (1, 5) TYPE: `tuple[int, int]` DEFAULT: `(1, 5)`

RETURNS	DESCRIPTION
`list[int] \| None`

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> df = pd.DataFrame({
...     'PAQ1': [np.nan, 2, 3, 3], 'PAQ2': [3, 2, 6, 3], 'PAQ3': [2, 2, 3, 3],
...     'PAQ4': [1, 2, 3, 3], 'PAQ5': [5, 2, 3, 3], 'PAQ6': [3, 2, 3, 3],
...     'PAQ7': [4, 2, 3, 3], 'PAQ8': [2, 2, 3, 3]
... })
>>> likert_data_quality(df)
[0, 1, 2]
>>> likert_data_quality(df,allow_na=True)
[1, 2]

Source code in src/soundscapy/surveys/processing.py

def likert_data_quality(
    df: pd.DataFrame, val_range: tuple[int, int] = (1, 5), *, allow_na: bool = False
) -> list[int] | None:
    """
    Perform basic quality checks on PAQ (Likert scale) data.

    Parameters
    ----------
    df
        DataFrame containing PAQ data
    allow_na
        Whether to allow NaN values in PAQ data, by default False
    val_range
        Valid range for PAQ values, by default (1, 5)

    Returns
    -------
    :

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> df = pd.DataFrame({
    ...     'PAQ1': [np.nan, 2, 3, 3], 'PAQ2': [3, 2, 6, 3], 'PAQ3': [2, 2, 3, 3],
    ...     'PAQ4': [1, 2, 3, 3], 'PAQ5': [5, 2, 3, 3], 'PAQ6': [3, 2, 3, 3],
    ...     'PAQ7': [4, 2, 3, 3], 'PAQ8': [2, 2, 3, 3]
    ... })
    >>> likert_data_quality(df)
    [0, 1, 2]
    >>> likert_data_quality(df,allow_na=True)
    [1, 2]

    """
    paqs = return_paqs(df, incl_ids=False)
    invalid_indices = []

    for idx, row in paqs.iterrows():
        # Convert the index to int to ensure type compatibility
        row_idx = int(idx) if isinstance(idx, str) else idx
        row_array = row.to_numpy()
        is_constant = row_array.shape[0] > 0 and (row_array[0] == row_array).all()

        if (not allow_na and row.isna().any()) or (
            row.notna().all()
            and (
                row.min() < min(val_range)
                or row.max() > max(val_range)
                or (is_constant and row.iloc[0] != np.mean(val_range))
            )
        ):
            invalid_indices.append(row_idx)

    if invalid_indices:
        logger.info(f"Found {len(invalid_indices)} samples with data quality issues")
        return invalid_indices

    logger.info("PAQ data quality check passed")
    return None

simulation ¶

simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame

Generate random PAQ responses for simulation purposes.

PARAMETER	DESCRIPTION
`n`	Number of samples to simulate, by default 3000 TYPE: `int` DEFAULT: `3000`
`val_range`	Range of values for PAQ responses, by default (1, 5) TYPE: `tuple[int, int]` DEFAULT: `(1, 5)`
`seed`	Optional random seed for deterministic output, by default None TYPE: `int \| None` DEFAULT: `None`
`incl_iso_coords`	Whether to add calculated ISO coordinates, by default False TYPE: `bool` DEFAULT: `False`
`**coord_kwargs`	Optional keyword arguments passed directly to the `add_iso_coords` function if `incl_iso_coords` is True. These can include: `names` (tuple[str, str]): Names for the new ISO coordinate columns. `angles` (tuple[int, ...]): Angles for each PAQ used in calculation. `overwrite` (bool): Whether to overwrite existing ISO coordinate columns. TYPE: `Unpack[_AddISOCoordsKwargs]` DEFAULT: `{}`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame of randomly generated PAQ responses

Examples:

>>> data = simulation(n=5,incl_iso_coords=True)
>>> data.shape
(5, 10)
>>> list(data.columns)
['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

Source code in src/soundscapy/surveys/processing.py

def simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame:
    """
    Generate random PAQ responses for simulation purposes.

    Parameters
    ----------
    n
        Number of samples to simulate, by default 3000
    val_range
        Range of values for PAQ responses, by default (1, 5)
    seed
        Optional random seed for deterministic output, by default None
    incl_iso_coords
        Whether to add calculated ISO coordinates, by default False
    **coord_kwargs
        Optional keyword arguments passed directly to the `add_iso_coords` function
        if `incl_iso_coords` is True. These can include:

        - `names` (tuple[str, str]): Names for the new ISO coordinate columns.
        - `angles` (tuple[int, ...]): Angles for each PAQ used in calculation.
        - `overwrite` (bool): Whether to overwrite existing ISO coordinate columns.

    Returns
    -------
    :
        DataFrame of randomly generated PAQ responses

    Examples
    --------
    >>> data = simulation(n=5,incl_iso_coords=True)
    >>> data.shape
    (5, 10)
    >>> list(data.columns)
    ['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

    """  # noqa: E501
    data = pd.DataFrame(
        np.random.default_rng(seed).integers(
            min(val_range), max(val_range) + 1, size=(n, 8)
        ),
        columns=PAQ_IDS,
    )

    if incl_iso_coords:
        data = add_iso_coords(data, val_range=val_range, **coord_kwargs)

    logger.info(f"Generated simulated PAQ data with {n} samples")
    return data

ssm_metrics ¶

ssm_metrics(
    df: DataFrame,
    paq_cols: list[str] = PAQ_IDS,
    method: str = "cosine",
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> pd.DataFrame

Calculate the Structural Summary Method (SSM) metrics for each response.

PARAMETER	DESCRIPTION
`df`	DataFrame containing PAQ data TYPE: `DataFrame`
`paq_cols`	List of PAQ column names, by default PAQ_IDS TYPE: `list[str]` DEFAULT: `PAQ_IDS`
`method`	Method to calculate SSM metrics, either "cosine" or "polar", by default "cosine" TYPE: `str` DEFAULT: `'cosine'`
`val_range`	Range of values for PAQ responses, by default (5, 1) TYPE: `tuple[int, int]` DEFAULT: `(5, 1)`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...]` DEFAULT: `EQUAL_ANGLES`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame containing the SSM metrics

RAISES	DESCRIPTION
`ValueError`	If PAQ columns are not present in the DataFrame or if an invalid method is specified

Examples:

>>>
>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> ssm_metrics(data).round(2)
   amplitude   angle  elevation  displacement  r_squared
0       0.68  263.82      10.57         -7.57       0.15
1       1.21   20.63       0.01          3.11       0.39

Source code in src/soundscapy/surveys/processing.py

def ssm_metrics(
    df: pd.DataFrame,
    paq_cols: list[str] = PAQ_IDS,
    method: str = "cosine",
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> pd.DataFrame:
    """
    Calculate the Structural Summary Method (SSM) metrics for each response.

    Parameters
    ----------
    df
        DataFrame containing PAQ data
    paq_cols
        List of PAQ column names, by default PAQ_IDS
    method
        Method to calculate SSM metrics, either "cosine" or "polar", by default "cosine"
    val_range
        Range of values for PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        DataFrame containing the SSM metrics

    Raises
    ------
    ValueError
        If PAQ columns are not present in the DataFrame
        or if an invalid method is specified

    Examples
    --------
    >>> # doctest: +SKIP
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> ssm_metrics(data).round(2)
       amplitude   angle  elevation  displacement  r_squared
    0       0.68  263.82      10.57         -7.57       0.15
    1       1.21   20.63       0.01          3.11       0.39

    """
    # TODO(MitchellAcoustics): Replace with a call to circumplex package
    warnings.warn(
        "This function is not yet fully implemented."
        "See https://github.com/MitchellAcoustics/circumplex for a "
        "more complete implementation.",
        PendingDeprecationWarning,
        stacklevel=2,
    )

    if not set(paq_cols).issubset(df.columns):
        msg = f"PAQ columns {paq_cols} not present in DataFrame"
        raise ValueError(msg)

    if method == "polar":
        iso_pleasant, iso_eventful = calculate_iso_coords(
            df[paq_cols], val_range, angles
        )
        r, theta = _convert_to_polar_coords(
            iso_pleasant.to_numpy(), iso_eventful.to_numpy()
        )
        mean = df[paq_cols].mean(axis=1)
        mean = mean / (max(val_range) - min(val_range)) if val_range != (0, 1) else mean

        return pd.DataFrame(
            {
                "amplitude": r,
                "angle": theta,
                "elevation": mean,
                "displacement": 0,  # Displacement is always 0 for polar method
                "r_squared": 1,  # R-squared is always 1 for polar method
            }
        )
    if method == "cosine":
        return df[paq_cols].apply(
            lambda y: ssm_cosine_fit(y, angles).table(),
            axis=1,
            result_type="expand",
        )
    msg = "Method must be either 'polar' or 'cosine'"
    raise ValueError(msg)

ssm_cosine_fit ¶

ssm_cosine_fit(
    y: Series,
    angles: tuple[int, ...] | ndarray = EQUAL_ANGLES,
    bounds: tuple[list[float], list[float]] = (
        [0, 0, 0, -np.inf],
        [np.inf, 360, np.inf, np.inf],
    ),
) -> SSMMetrics

Fit a cosine model to the PAQ data for SSM analysis.

PARAMETER	DESCRIPTION
`y`	Series of PAQ values TYPE: `Series`
`angles`	Angles for each PAQ in degrees, by default EQUAL_ANGLES TYPE: `tuple[int, ...] \| ndarray` DEFAULT: `EQUAL_ANGLES`
`bounds`	Bounds for the optimization parameters, by default ([0, 0, 0, -np.inf], [np.inf, 360, np.inf, np.inf]) TYPE: `tuple[list[float], list[float]]` DEFAULT: `([0, 0, 0, -inf], [inf, 360, inf, inf])`

RETURNS	DESCRIPTION
`SSMMetrics`	Calculated SSM metrics

Examples:

>>>
>>> import pandas as pd
>>> y = pd.Series([4, 3, 2, 1, 5, 3, 4, 2])
>>> metrics = ssm_cosine_fit(y)
>>> [round(v, 2) if isinstance(v, float) else v for v in metrics.table()]
[0.68, 263.82, 10.57, -7.57, 0.15]

Source code in src/soundscapy/surveys/processing.py

def ssm_cosine_fit(
    y: pd.Series,
    angles: tuple[int, ...] | np.ndarray = EQUAL_ANGLES,
    bounds: tuple[list[float], list[float]] = (
        [0, 0, 0, -np.inf],
        [np.inf, 360, np.inf, np.inf],
    ),
) -> SSMMetrics:
    """
    Fit a cosine model to the PAQ data for SSM analysis.

    Parameters
    ----------
    y
        Series of PAQ values
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    bounds
        Bounds for the optimization parameters,
        by default ([0, 0, 0, -np.inf], [np.inf, 360, np.inf, np.inf])

    Returns
    -------
    :
        Calculated SSM metrics

    Examples
    --------
    >>> # doctest: +SKIP
    >>> import pandas as pd
    >>> y = pd.Series([4, 3, 2, 1, 5, 3, 4, 2])
    >>> metrics = ssm_cosine_fit(y)
    >>> [round(v, 2) if isinstance(v, float) else v for v in metrics.table()]
    [0.68, 263.82, 10.57, -7.57, 0.15]

    """
    warnings.warn(
        "This function is not yet fully implemented."
        "See https://github.com/MitchellAcoustics/circumplex "
        "for a more complete implementation.",
        PendingDeprecationWarning,
        stacklevel=2,
    )

    def _cosine_model(
        theta: np.ndarray, amp: float, delta: float, elev: float, dev: float
    ) -> np.ndarray:
        return elev + amp * np.cos(np.radians(theta - delta)) + dev

    param, _ = optimize.curve_fit(
        _cosine_model,
        xdata=angles,
        ydata=y,
        bounds=bounds,
    )
    amp, delta, elev, dev = param
    angles = np.array(angles) if isinstance(angles, tuple) else angles
    r_squared = _r2_score(y.to_numpy(), _cosine_model(angles, *param))

    return SSMMetrics(
        amplitude=amp,
        angle=delta,
        elevation=elev,
        displacement=dev,
        r_squared=r_squared,
    )

ipsatize ¶

ipsatize(
    data: DataFrame,
    method: Literal[
        "grand_mean", "column_wise", "row_wise"
    ] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame

Participant-level ipsatization for circumplex analysis.

Removes systematic response biases before computing a correlation matrix. The choice of method depends on the study design and the type of bias being corrected.

PARAMETER	DESCRIPTION
`data`	DataFrame containing PAQ scale columns and (for participant-level methods) a grouping column. TYPE: `DataFrame`
`method`	Centering strategy: `"grand_mean"` (default) — one scalar per participant: the mean across all PAQ values and all observations for that participant. Removes overall response-level differences between participants. Matches the published SATP analysis (Aletta et al., 2024) and the original R implementation. `"column_wise"` — eight scalars per participant: the per-scale mean across that participant's observations. Removes scale-specific response biases. This is the behaviour of the legacy :func:`person_center` function. `"row_wise"` — one scalar per observation: the mean across all PAQ scales within that observation. Removes the general impression of each individual soundscape stimulus. Equivalent to `circumplex.ipsatize()`. TYPE: `Literal['grand_mean', 'column_wise', 'row_wise']` DEFAULT: `'grand_mean'`
`participant_col`	Column used to group observations by participant. Required for `"grand_mean"` and `"column_wise"`; ignored for `"row_wise"`. TYPE: `str` DEFAULT: `'participant'`
`scales`	PAQ column names to centre. Defaults to :data:`PAQ_IDS` when `None`. TYPE: `list[str] \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame containing only the scale columns with centred values. The `participant_col` grouping column is excluded from the result.

RAISES	DESCRIPTION
`KeyError`	If `participant_col` is not present in `data` when `method` is `"grand_mean"` or `"column_wise"`.

Examples:

>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
...     'participant': ['A', 'A', 'B', 'B'],
... })
>>> result = ipsatize(data, method="grand_mean")
>>> result['PAQ1'].tolist()
[-5.0, 5.0, 5.0, -5.0]

Source code in src/soundscapy/surveys/processing.py

def ipsatize(
    data: pd.DataFrame,
    method: Literal["grand_mean", "column_wise", "row_wise"] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame:
    """
    Participant-level ipsatization for circumplex analysis.

    Removes systematic response biases before computing a correlation matrix.
    The choice of method depends on the study design and the type of bias
    being corrected.

    Parameters
    ----------
    data
        DataFrame containing PAQ scale columns and (for participant-level
        methods) a grouping column.
    method
        Centering strategy:

        ``"grand_mean"`` *(default)* — one scalar per participant: the mean
        across *all* PAQ values and *all* observations for that participant.
        Removes overall response-level differences between participants.
        **Matches the published SATP analysis (Aletta et al., 2024) and the
        original R implementation.**

        ``"column_wise"`` — eight scalars per participant: the per-scale mean
        across that participant's observations.  Removes scale-specific
        response biases.  This is the behaviour of the legacy
        :func:`person_center` function.

        ``"row_wise"`` — one scalar per observation: the mean across all PAQ
        scales within that observation.  Removes the general impression of
        each individual soundscape stimulus.  Equivalent to
        ``circumplex.ipsatize()``.
    participant_col
        Column used to group observations by participant.  Required for
        ``"grand_mean"`` and ``"column_wise"``; ignored for ``"row_wise"``.
    scales
        PAQ column names to centre.  Defaults to :data:`PAQ_IDS` when
        ``None``.

    Returns
    -------
    :
        DataFrame containing only the scale columns with centred values.
        The ``participant_col`` grouping column is excluded from the result.

    Raises
    ------
    KeyError
        If ``participant_col`` is not present in ``data`` when
        ``method`` is ``"grand_mean"`` or ``"column_wise"``.

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
    ...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
    ...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
    ...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
    ...     'participant': ['A', 'A', 'B', 'B'],
    ... })
    >>> result = ipsatize(data, method="grand_mean")
    >>> result['PAQ1'].tolist()
    [-5.0, 5.0, 5.0, -5.0]

    """
    _scales = scales if scales is not None else PAQ_IDS

    if method == "column_wise":
        means = data.groupby(participant_col)[_scales].transform("mean")
        return data[_scales] - means

    if method == "grand_mean":
        # Compute a single scalar per participant: mean across all PAQ values
        # and all observations for that participant.  Use nanmean so that
        # participants with partial NaN data still get a valid grand mean
        # computed from their non-NaN values; NaN rows are then removed by
        # downstream listwise deletion rather than silently expanding data loss
        # to the whole participant.
        grand_means = data.groupby(participant_col)[_scales].apply(
            lambda df: float(np.nanmean(df.values))
        )
        grand_mean_per_row = data[participant_col].map(grand_means)
        return data[_scales].subtract(grand_mean_per_row, axis=0)

    if method == "row_wise":
        row_means = data[_scales].mean(axis=1)
        return data[_scales].sub(row_means, axis=0)

    msg = f"method must be 'grand_mean', 'column_wise', or 'row_wise'; got {method!r}"
    raise ValueError(msg)

Survey utilities¶

soundscapy.surveys.survey_utils ¶

Core utility functions for processing soundscape survey data.

This module contains fundamental functions and constants used across the soundscapy package for handling and analyzing soundscape survey data.

CLASS	DESCRIPTION
`PAQ`	Enumeration of Perceptual Attribute Questions (PAQ) names and IDs.
`PAQDfSchema`	Pandera schema for validating PAQ (Perceptual Attribute Questions) DataFrames.
`LikertScale`	Contains different Likert scale options for survey questions.

FUNCTION	DESCRIPTION
`return_paqs`	Return only the PAQ columns from a DataFrame.
`rename_paqs`	Rename the PAQ columns in a DataFrame to standard PAQ IDs.
`mean_responses`	Calculate the mean responses for each PAQ group.

PAQ ¶

PAQ(label: str, id: str)

Bases: Enum

Enumeration of Perceptual Attribute Questions (PAQ) names and IDs.

Initialize a PAQ enum member.

PARAMETER	DESCRIPTION
`label`	The descriptive label for the PAQ (e.g., 'pleasant'). TYPE: `str`
`id`	The standard identifier for the PAQ (e.g., 'PAQ1'). TYPE: `str`

Source code in src/soundscapy/surveys/survey_utils.py

def __init__(self, label: str, id: str) -> None:  # noqa: A002
    """
    Initialize a PAQ enum member.

    Parameters
    ----------
    label
        The descriptive label for the PAQ (e.g., 'pleasant').
    id
        The standard identifier for the PAQ (e.g., 'PAQ1').

    """
    self.label = label
    self.id = id

PAQDfSchema ¶

Bases: DataFrameModel

Pandera schema for validating PAQ (Perceptual Attribute Questions) DataFrames.

This schema defines the expected structure and data types for DataFrames containing soundscape survey data with PAQ responses and associated metadata. It includes automatic column name coercion to standardize various input formats.

ATTRIBUTE	DESCRIPTION
`PAQ1-PAQ8`	Perceptual Attribute Question responses (1-8) on a Likert scale. Nullable to allow for missing responses. TYPE: `Series[float]`
`language`	Language code for the survey responses. Optional field. TYPE: `Series[str] \| None`
`location_id`	Identifier for the survey location. Optional field. TYPE: `Series[str] \| None`
`session_id`	Identifier for the survey session. Optional field. TYPE: `Series[str] \| None`
`group_id`	Identifier for the survey group. Optional field. TYPE: `Series[str] \| None`
`record_id`	Unique identifier for each survey record. Optional field. TYPE: `Series[str] \| None`

METHOD	DESCRIPTION
`column_name_coercion`	Coerce column names to standardized format for PAQ data.

column_name_coercion ¶

column_name_coercion(df: DataFrame) -> DataFrame

Coerce column names to standardized format for PAQ data.

This parser automatically renames columns to match the expected schema:

PAQ label names (e.g., 'pleasant') to PAQ IDs (e.g., 'PAQ1')
Legacy ID column names to lowercase snake_case format

PARAMETER	DESCRIPTION
`cls`	The schema class (automatically passed by pandera).
`df`	Input DataFrame with potentially non-standard column names. TYPE: `DataFrame`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with standardized column names.

Source code in src/soundscapy/surveys/survey_utils.py

@pa.dataframe_parser
def column_name_coercion(cls, df: DataFrame) -> DataFrame:  # noqa: N805
    """
    Coerce column names to standardized format for PAQ data.

    This parser automatically renames columns to match the expected schema:

    - PAQ label names (e.g., 'pleasant') to PAQ IDs (e.g., 'PAQ1')
    - Legacy ID column names to lowercase snake_case format

    Parameters
    ----------
    cls
        The schema class (automatically passed by pandera).
    df
        Input DataFrame with potentially non-standard column names.

    Returns
    -------
    :
        DataFrame with standardized column names.

    """
    # Create mapping from PAQ labels to standard PAQ IDs
    rename_dict = dict(zip(PAQ_LABELS, PAQ_IDS, strict=False))

    # Add mappings for legacy ID column names to snake_case format
    rename_dict.update(
        {
            "LocationID": "location_id",
            "SessionID": "session_id",
            "GroupID": "group_id",
            "RecordID": "record_id",
        }
    )
    return df.rename(columns=rename_dict)

LikertScale `dataclass` ¶

LikertScale(
    paq: Scale = (
        lambda: [
            "Strongly disagree",
            "Somewhat disagree",
            "Neutral",
            "Somewhat agree",
            "Strongly agree",
        ]
    )(),
    source: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "A lot",
            "Dominates completely",
        ]
    )(),
    overall: Scale = (
        lambda: [
            "Very bad",
            "Bad",
            "Neither bad nor good",
            "Good",
            "Very good",
        ]
    )(),
    appropriate: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "A lot",
            "Perfectly",
        ]
    )(),
    loud: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "Very",
            "Extremely",
        ]
    )(),
    often: Scale = (
        lambda: [
            "Never / This is my first time here",
            "Rarely",
            "Sometimes",
            "Often",
            "Very often",
        ]
    )(),
    visit: Scale = (
        lambda: [
            "Never",
            "Rarely",
            "Sometimes",
            "Often",
            "Very often",
        ]
    )(),
)

Contains different Likert scale options for survey questions.

This class provides standardized 5-point Likert scales questions commonly used in acoustic and soundscape surveys.

ATTRIBUTE	DESCRIPTION
`PAQ`	Agreement scale from "Strongly disagree" to "Strongly agree"
`SOURCE`	Source perception scale from "Not at all" to "Dominates completely"
`OVERALL`	Quality assessment scale from "Very bad" to "Very good"
`APPROPRIATE`	Appropriateness scale from "Not at all" to "Perfectly"
`LOUD`	Loudness perception scale from "Not at all" to "Extremely"
`OFTEN`	Frequency scale with first-time option from "Never / This is my first time here" to "Very often"
`VISIT`	Standard frequency scale from "Never" to "Very often"

return_paqs ¶

return_paqs(
    df: DataFrame,
    other_cols: list[str] | None = None,
    *,
    incl_ids: bool = True,
) -> pd.DataFrame

Return only the PAQ columns from a DataFrame.

PARAMETER	DESCRIPTION
`df`	Input DataFrame containing PAQ data. TYPE: `DataFrame`
`other_cols`	Other columns to include in the output, by default None. TYPE: `list[str] \| None` DEFAULT: `None`
`incl_ids`	Whether to include ID columns (RecordID, GroupID, etc.), by default True. TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame containing only the PAQ columns and optionally ID and other specified columns.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'RecordID': [1, 2],
...     'PAQ1': [4, 3],
...     'PAQ2': [2, 5],
...     'PAQ3': [1, 2],
...     'PAQ4': [3, 4],
...     'PAQ5': [5, 1],
...     'PAQ6': [2, 3],
...     'PAQ7': [4, 5],
...     'PAQ8': [1, 2],
...     'OtherCol': ['A', 'B']
... })
>>> return_paqs(df)
   RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
0         1     4     2     1     3     5     2     4     1
1         2     3     5     2     4     1     3     5     2
>>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
   PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
0     4     2     1     3     5     2     4     1        A
1     3     5     2     4     1     3     5     2        B

Source code in src/soundscapy/surveys/survey_utils.py

def return_paqs(
    df: pd.DataFrame, other_cols: list[str] | None = None, *, incl_ids: bool = True
) -> pd.DataFrame:
    """
    Return only the PAQ columns from a DataFrame.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    other_cols
        Other columns to include in the output, by default None.
    incl_ids
        Whether to include ID columns (RecordID, GroupID, etc.), by default True.

    Returns
    -------
    :
        DataFrame containing only the PAQ columns and optionally ID and other specified
        columns.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'RecordID': [1, 2],
    ...     'PAQ1': [4, 3],
    ...     'PAQ2': [2, 5],
    ...     'PAQ3': [1, 2],
    ...     'PAQ4': [3, 4],
    ...     'PAQ5': [5, 1],
    ...     'PAQ6': [2, 3],
    ...     'PAQ7': [4, 5],
    ...     'PAQ8': [1, 2],
    ...     'OtherCol': ['A', 'B']
    ... })
    >>> return_paqs(df)
       RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
    0         1     4     2     1     3     5     2     4     1
    1         2     3     5     2     4     1     3     5     2
    >>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
       PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
    0     4     2     1     3     5     2     4     1        A
    1     3     5     2     4     1     3     5     2        B

    """
    cols = PAQ_IDS.copy()

    if incl_ids:
        id_cols = [
            name
            for name in ["RecordID", "GroupID", "SessionID", "LocationID"]
            if name in df.columns
        ]
        cols = id_cols + cols

    if other_cols:
        cols.extend(other_cols)

    logger.debug(f"Returning PAQ columns: {cols}")
    return df[cols]

rename_paqs ¶

rename_paqs(
    df: DataFrame,
    paq_aliases: list | tuple | dict | None = None,
) -> pd.DataFrame

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

PARAMETER	DESCRIPTION
`df`	Input DataFrame containing PAQ data. TYPE: `DataFrame`
`paq_aliases`	Specify which PAQs are to be renamed. If None, will check if the column names are in pre-defined options. If a tuple, the order must match PAQ_IDS. If a dict, keys are current names and values are desired PAQ IDs. TYPE: `list \| tuple \| dict \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with renamed PAQ columns.

RAISES	DESCRIPTION
`ValueError`	If paq_aliases is not a tuple, list, or dictionary.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'pleasant': [4, 3],
...     'vibrant': [2, 5],
...     'other_col': [1, 2]
... })
>>> rename_paqs(df)
   PAQ1  PAQ2  other_col
0     4     2          1
1     3     5          2
>>> df_custom = pd.DataFrame({
...     'pl': [4, 3],
...     'vb': [2, 5],
... })
>>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
   PAQ1  PAQ2
0     4     2
1     3     5

Source code in src/soundscapy/surveys/survey_utils.py

def rename_paqs(
    df: pd.DataFrame, paq_aliases: list | tuple | dict | None = None
) -> pd.DataFrame:
    """
    Rename the PAQ columns in a DataFrame to standard PAQ IDs.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    paq_aliases
        Specify which PAQs are to be renamed. If None, will check if the column names
        are in pre-defined options. If a tuple, the order must match PAQ_IDS.
        If a dict, keys are current names and values are desired PAQ IDs.

    Returns
    -------
    :
        DataFrame with renamed PAQ columns.

    Raises
    ------
    ValueError
        If paq_aliases is not a tuple, list, or dictionary.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'pleasant': [4, 3],
    ...     'vibrant': [2, 5],
    ...     'other_col': [1, 2]
    ... })
    >>> rename_paqs(df)
       PAQ1  PAQ2  other_col
    0     4     2          1
    1     3     5          2
    >>> df_custom = pd.DataFrame({
    ...     'pl': [4, 3],
    ...     'vb': [2, 5],
    ... })
    >>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
       PAQ1  PAQ2
    0     4     2
    1     3     5

    """
    if paq_aliases is None:
        if any(paq_id in df.columns for paq_id in PAQ_IDS):
            logger.info("PAQs already correctly named.")
            return df
        if any(paq_name in df.columns for paq_name in PAQ_LABELS):
            paq_aliases = PAQ_LABELS

    if isinstance(paq_aliases, list | tuple):
        rename_dict = dict(zip(paq_aliases, PAQ_IDS, strict=False))
    elif isinstance(paq_aliases, dict):
        rename_dict = paq_aliases
    else:
        msg = "paq_aliases must be a tuple, list, or dictionary."
        raise TypeError(msg)

    logger.debug(f"Renaming PAQs with the following mapping: {rename_dict}")
    return df.rename(columns=rename_dict)

mean_responses ¶

mean_responses(df: DataFrame, group: str) -> pd.DataFrame

Calculate the mean responses for each PAQ group.

PARAMETER	DESCRIPTION
`df`	Input DataFrame containing PAQ data. TYPE: `DataFrame`
`group`	Column name to group by. TYPE: `str`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with mean responses for each PAQ group.

Source code in src/soundscapy/surveys/survey_utils.py

def mean_responses(df: pd.DataFrame, group: str) -> pd.DataFrame:
    """
    Calculate the mean responses for each PAQ group.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    group
        Column name to group by.

    Returns
    -------
    :
        DataFrame with mean responses for each PAQ group.

    """
    data = return_paqs(df, other_cols=[group], incl_ids=False)
    return data.groupby(group).mean().reset_index()

Surveys¶

soundscapy.surveys ¶

add_iso_coords ¶

calculate_iso_coords ¶

ipsatize ¶

return_paqs ¶

simulation ¶

rename_paqs ¶

Processing¶

soundscapy.surveys.processing ¶

ISOCoordinates dataclass ¶

SSMMetrics dataclass ¶

table ¶

calculate_iso_coords ¶

add_iso_coords ¶

likert_data_quality ¶

simulation ¶

ssm_metrics ¶

ssm_cosine_fit ¶

ipsatize ¶

Survey utilities¶

soundscapy.surveys.survey_utils ¶

PAQ ¶

PAQDfSchema ¶

column_name_coercion ¶

LikertScale dataclass ¶

return_paqs ¶

rename_paqs ¶

mean_responses ¶

ISOCoordinates `dataclass` ¶

SSMMetrics `dataclass` ¶

LikertScale `dataclass` ¶