Skip to content

Surveys

soundscapy.surveys

Soundscapy Surveys Package.

This package handles the processing and analysis of soundscape surveys, including PAQ (Perceived Affective Quality) data and ISO coordinate calculations.

MODULE DESCRIPTION
processing

Soundscape survey data processing module.

survey_utils

Core utility functions for processing soundscape survey data.

FUNCTION DESCRIPTION
add_iso_coords

Calculate and add ISO coordinates as new columns in the DataFrame.

calculate_iso_coords

Calculate the projected ISOPleasant and ISOEventful coordinates.

ipsatize

Participant-level ipsatization for circumplex analysis.

return_paqs

Return only the PAQ columns from a DataFrame.

simulation

Generate random PAQ responses for simulation purposes.

rename_paqs

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

add_iso_coords

add_iso_coords(
    data: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame

Calculate and add ISO coordinates as new columns in the DataFrame.

PARAMETER DESCRIPTION
data

Input DataFrame containing PAQ data

TYPE: DataFrame

val_range

(min, max) range of original PAQ responses, by default (1, 5)

TYPE: tuple[int, int] DEFAULT: (1, 5)

names

Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")

TYPE: tuple[str, str] DEFAULT: ('ISOPleasant', 'ISOEventful')

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] DEFAULT: EQUAL_ANGLES

overwrite

Whether to overwrite existing ISO coordinate columns, by default False

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
DataFrame

DataFrame with new ISO coordinate columns added

RAISES DESCRIPTION
Warning

If ISO coordinate columns already exist and overwrite is False

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> df_with_iso = add_iso_coords(df)
>>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
   ISOPleasant  ISOEventful
0        -0.03        -0.28
1         0.47         0.18
Source code in src/soundscapy/surveys/processing.py
def add_iso_coords(
    data: pd.DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame:
    """
    Calculate and add ISO coordinates as new columns in the DataFrame.

    Parameters
    ----------
    data
        Input DataFrame containing PAQ data
    val_range
        (min, max) range of original PAQ responses, by default (1, 5)
    names
        Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    overwrite
        Whether to overwrite existing ISO coordinate columns, by default False

    Returns
    -------
    :
        DataFrame with new ISO coordinate columns added

    Raises
    ------
    Warning
        If ISO coordinate columns already exist and overwrite is False

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> df_with_iso = add_iso_coords(df)
    >>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
       ISOPleasant  ISOEventful
    0        -0.03        -0.28
    1         0.47         0.18

    """
    for name in names:
        if name in data.columns:
            if overwrite:
                data = data.drop(name, axis=1)
            else:
                msg = (
                    f"{name} already in dataframe. Use `overwrite=True` to replace it."
                )
                raise Warning(msg)

    iso_pleasant, iso_eventful = calculate_iso_coords(
        data, val_range=val_range, angles=angles
    )
    data = data.assign(**{names[0]: iso_pleasant, names[1]: iso_eventful})

    logger.info(f"Added ISO coordinates to DataFrame with column names: {names}")
    return data

calculate_iso_coords

calculate_iso_coords(
    results_df: DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]

Calculate the projected ISOPleasant and ISOEventful coordinates.

PARAMETER DESCRIPTION
results_df

DataFrame containing PAQ data.

TYPE: DataFrame

val_range

(max, min) range of original PAQ responses, by default (5, 1)

TYPE: tuple[int, int] DEFAULT: (5, 1)

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] DEFAULT: EQUAL_ANGLES

RETURNS DESCRIPTION
tuple[Series, Series]

ISOPleasant and ISOEventful coordinate values

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
>>> iso_pleasant.round(2)
0   -0.03
1    0.47
dtype: float64
>>> iso_eventful.round(2)
0   -0.28
1    0.18
dtype: float64
Source code in src/soundscapy/surveys/processing.py
def calculate_iso_coords(
    results_df: pd.DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]:
    """
    Calculate the projected ISOPleasant and ISOEventful coordinates.

    Parameters
    ----------
    results_df
        DataFrame containing PAQ data.
    val_range
        (max, min) range of original PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        ISOPleasant and ISOEventful coordinate values

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
    >>> iso_pleasant.round(2)
    0   -0.03
    1    0.47
    dtype: float64
    >>> iso_eventful.round(2)
    0   -0.28
    1    0.18
    dtype: float64

    """
    scale = max(val_range) - min(val_range)

    paq_df = return_paqs(results_df, incl_ids=False)

    iso_pleasant = paq_df.apply(lambda row: _adj_iso_pl(row, angles, scale), axis=1)
    iso_eventful = paq_df.apply(lambda row: _adj_iso_ev(row, angles, scale), axis=1)

    logger.info(f"Calculated ISO coordinates for {len(results_df)} samples")
    return iso_pleasant, iso_eventful

ipsatize

ipsatize(
    data: DataFrame,
    method: Literal[
        "grand_mean", "column_wise", "row_wise"
    ] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame

Participant-level ipsatization for circumplex analysis.

Removes systematic response biases before computing a correlation matrix. The choice of method depends on the study design and the type of bias being corrected.

PARAMETER DESCRIPTION
data

DataFrame containing PAQ scale columns and (for participant-level methods) a grouping column.

TYPE: DataFrame

method

Centering strategy:

"grand_mean" (default) — one scalar per participant: the mean across all PAQ values and all observations for that participant. Removes overall response-level differences between participants. Matches the published SATP analysis (Aletta et al., 2024) and the original R implementation.

"column_wise" — eight scalars per participant: the per-scale mean across that participant's observations. Removes scale-specific response biases. This is the behaviour of the legacy :func:person_center function.

"row_wise" — one scalar per observation: the mean across all PAQ scales within that observation. Removes the general impression of each individual soundscape stimulus. Equivalent to circumplex.ipsatize().

TYPE: Literal['grand_mean', 'column_wise', 'row_wise'] DEFAULT: 'grand_mean'

participant_col

Column used to group observations by participant. Required for "grand_mean" and "column_wise"; ignored for "row_wise".

TYPE: str DEFAULT: 'participant'

scales

PAQ column names to centre. Defaults to :data:PAQ_IDS when None.

TYPE: list[str] | None DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame containing only the scale columns with centred values. The participant_col grouping column is excluded from the result.

RAISES DESCRIPTION
KeyError

If participant_col is not present in data when method is "grand_mean" or "column_wise".

Examples:

>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
...     'participant': ['A', 'A', 'B', 'B'],
... })
>>> result = ipsatize(data, method="grand_mean")
>>> result['PAQ1'].tolist()
[-5.0, 5.0, 5.0, -5.0]
Source code in src/soundscapy/surveys/processing.py
def ipsatize(
    data: pd.DataFrame,
    method: Literal["grand_mean", "column_wise", "row_wise"] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame:
    """
    Participant-level ipsatization for circumplex analysis.

    Removes systematic response biases before computing a correlation matrix.
    The choice of method depends on the study design and the type of bias
    being corrected.

    Parameters
    ----------
    data
        DataFrame containing PAQ scale columns and (for participant-level
        methods) a grouping column.
    method
        Centering strategy:

        ``"grand_mean"`` *(default)* — one scalar per participant: the mean
        across *all* PAQ values and *all* observations for that participant.
        Removes overall response-level differences between participants.
        **Matches the published SATP analysis (Aletta et al., 2024) and the
        original R implementation.**

        ``"column_wise"`` — eight scalars per participant: the per-scale mean
        across that participant's observations.  Removes scale-specific
        response biases.  This is the behaviour of the legacy
        :func:`person_center` function.

        ``"row_wise"`` — one scalar per observation: the mean across all PAQ
        scales within that observation.  Removes the general impression of
        each individual soundscape stimulus.  Equivalent to
        ``circumplex.ipsatize()``.
    participant_col
        Column used to group observations by participant.  Required for
        ``"grand_mean"`` and ``"column_wise"``; ignored for ``"row_wise"``.
    scales
        PAQ column names to centre.  Defaults to :data:`PAQ_IDS` when
        ``None``.

    Returns
    -------
    :
        DataFrame containing only the scale columns with centred values.
        The ``participant_col`` grouping column is excluded from the result.

    Raises
    ------
    KeyError
        If ``participant_col`` is not present in ``data`` when
        ``method`` is ``"grand_mean"`` or ``"column_wise"``.

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
    ...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
    ...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
    ...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
    ...     'participant': ['A', 'A', 'B', 'B'],
    ... })
    >>> result = ipsatize(data, method="grand_mean")
    >>> result['PAQ1'].tolist()
    [-5.0, 5.0, 5.0, -5.0]

    """
    _scales = scales if scales is not None else PAQ_IDS

    if method == "column_wise":
        means = data.groupby(participant_col)[_scales].transform("mean")
        return data[_scales] - means

    if method == "grand_mean":
        # Compute a single scalar per participant: mean across all PAQ values
        # and all observations for that participant.  Use nanmean so that
        # participants with partial NaN data still get a valid grand mean
        # computed from their non-NaN values; NaN rows are then removed by
        # downstream listwise deletion rather than silently expanding data loss
        # to the whole participant.
        grand_means = data.groupby(participant_col)[_scales].apply(
            lambda df: float(np.nanmean(df.values))
        )
        grand_mean_per_row = data[participant_col].map(grand_means)
        return data[_scales].subtract(grand_mean_per_row, axis=0)

    if method == "row_wise":
        row_means = data[_scales].mean(axis=1)
        return data[_scales].sub(row_means, axis=0)

    msg = f"method must be 'grand_mean', 'column_wise', or 'row_wise'; got {method!r}"
    raise ValueError(msg)

return_paqs

return_paqs(
    df: DataFrame,
    other_cols: list[str] | None = None,
    *,
    incl_ids: bool = True,
) -> pd.DataFrame

Return only the PAQ columns from a DataFrame.

PARAMETER DESCRIPTION
df

Input DataFrame containing PAQ data.

TYPE: DataFrame

other_cols

Other columns to include in the output, by default None.

TYPE: list[str] | None DEFAULT: None

incl_ids

Whether to include ID columns (RecordID, GroupID, etc.), by default True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
DataFrame

DataFrame containing only the PAQ columns and optionally ID and other specified columns.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'RecordID': [1, 2],
...     'PAQ1': [4, 3],
...     'PAQ2': [2, 5],
...     'PAQ3': [1, 2],
...     'PAQ4': [3, 4],
...     'PAQ5': [5, 1],
...     'PAQ6': [2, 3],
...     'PAQ7': [4, 5],
...     'PAQ8': [1, 2],
...     'OtherCol': ['A', 'B']
... })
>>> return_paqs(df)
   RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
0         1     4     2     1     3     5     2     4     1
1         2     3     5     2     4     1     3     5     2
>>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
   PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
0     4     2     1     3     5     2     4     1        A
1     3     5     2     4     1     3     5     2        B
Source code in src/soundscapy/surveys/survey_utils.py
def return_paqs(
    df: pd.DataFrame, other_cols: list[str] | None = None, *, incl_ids: bool = True
) -> pd.DataFrame:
    """
    Return only the PAQ columns from a DataFrame.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    other_cols
        Other columns to include in the output, by default None.
    incl_ids
        Whether to include ID columns (RecordID, GroupID, etc.), by default True.

    Returns
    -------
    :
        DataFrame containing only the PAQ columns and optionally ID and other specified
        columns.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'RecordID': [1, 2],
    ...     'PAQ1': [4, 3],
    ...     'PAQ2': [2, 5],
    ...     'PAQ3': [1, 2],
    ...     'PAQ4': [3, 4],
    ...     'PAQ5': [5, 1],
    ...     'PAQ6': [2, 3],
    ...     'PAQ7': [4, 5],
    ...     'PAQ8': [1, 2],
    ...     'OtherCol': ['A', 'B']
    ... })
    >>> return_paqs(df)
       RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
    0         1     4     2     1     3     5     2     4     1
    1         2     3     5     2     4     1     3     5     2
    >>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
       PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
    0     4     2     1     3     5     2     4     1        A
    1     3     5     2     4     1     3     5     2        B

    """
    cols = PAQ_IDS.copy()

    if incl_ids:
        id_cols = [
            name
            for name in ["RecordID", "GroupID", "SessionID", "LocationID"]
            if name in df.columns
        ]
        cols = id_cols + cols

    if other_cols:
        cols.extend(other_cols)

    logger.debug(f"Returning PAQ columns: {cols}")
    return df[cols]

simulation

simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame

Generate random PAQ responses for simulation purposes.

PARAMETER DESCRIPTION
n

Number of samples to simulate, by default 3000

TYPE: int DEFAULT: 3000

val_range

Range of values for PAQ responses, by default (1, 5)

TYPE: tuple[int, int] DEFAULT: (1, 5)

seed

Optional random seed for deterministic output, by default None

TYPE: int | None DEFAULT: None

incl_iso_coords

Whether to add calculated ISO coordinates, by default False

TYPE: bool DEFAULT: False

**coord_kwargs

Optional keyword arguments passed directly to the add_iso_coords function if incl_iso_coords is True. These can include:

  • names (tuple[str, str]): Names for the new ISO coordinate columns.
  • angles (tuple[int, ...]): Angles for each PAQ used in calculation.
  • overwrite (bool): Whether to overwrite existing ISO coordinate columns.

TYPE: Unpack[_AddISOCoordsKwargs] DEFAULT: {}

RETURNS DESCRIPTION
DataFrame

DataFrame of randomly generated PAQ responses

Examples:

>>> data = simulation(n=5,incl_iso_coords=True)
>>> data.shape
(5, 10)
>>> list(data.columns)
['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']
Source code in src/soundscapy/surveys/processing.py
def simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame:
    """
    Generate random PAQ responses for simulation purposes.

    Parameters
    ----------
    n
        Number of samples to simulate, by default 3000
    val_range
        Range of values for PAQ responses, by default (1, 5)
    seed
        Optional random seed for deterministic output, by default None
    incl_iso_coords
        Whether to add calculated ISO coordinates, by default False
    **coord_kwargs
        Optional keyword arguments passed directly to the `add_iso_coords` function
        if `incl_iso_coords` is True. These can include:

        - `names` (tuple[str, str]): Names for the new ISO coordinate columns.
        - `angles` (tuple[int, ...]): Angles for each PAQ used in calculation.
        - `overwrite` (bool): Whether to overwrite existing ISO coordinate columns.

    Returns
    -------
    :
        DataFrame of randomly generated PAQ responses

    Examples
    --------
    >>> data = simulation(n=5,incl_iso_coords=True)
    >>> data.shape
    (5, 10)
    >>> list(data.columns)
    ['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

    """  # noqa: E501
    data = pd.DataFrame(
        np.random.default_rng(seed).integers(
            min(val_range), max(val_range) + 1, size=(n, 8)
        ),
        columns=PAQ_IDS,
    )

    if incl_iso_coords:
        data = add_iso_coords(data, val_range=val_range, **coord_kwargs)

    logger.info(f"Generated simulated PAQ data with {n} samples")
    return data

rename_paqs

rename_paqs(
    df: DataFrame,
    paq_aliases: list | tuple | dict | None = None,
) -> pd.DataFrame

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

PARAMETER DESCRIPTION
df

Input DataFrame containing PAQ data.

TYPE: DataFrame

paq_aliases

Specify which PAQs are to be renamed. If None, will check if the column names are in pre-defined options. If a tuple, the order must match PAQ_IDS. If a dict, keys are current names and values are desired PAQ IDs.

TYPE: list | tuple | dict | None DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame with renamed PAQ columns.

RAISES DESCRIPTION
ValueError

If paq_aliases is not a tuple, list, or dictionary.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'pleasant': [4, 3],
...     'vibrant': [2, 5],
...     'other_col': [1, 2]
... })
>>> rename_paqs(df)
   PAQ1  PAQ2  other_col
0     4     2          1
1     3     5          2
>>> df_custom = pd.DataFrame({
...     'pl': [4, 3],
...     'vb': [2, 5],
... })
>>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
   PAQ1  PAQ2
0     4     2
1     3     5
Source code in src/soundscapy/surveys/survey_utils.py
def rename_paqs(
    df: pd.DataFrame, paq_aliases: list | tuple | dict | None = None
) -> pd.DataFrame:
    """
    Rename the PAQ columns in a DataFrame to standard PAQ IDs.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    paq_aliases
        Specify which PAQs are to be renamed. If None, will check if the column names
        are in pre-defined options. If a tuple, the order must match PAQ_IDS.
        If a dict, keys are current names and values are desired PAQ IDs.

    Returns
    -------
    :
        DataFrame with renamed PAQ columns.

    Raises
    ------
    ValueError
        If paq_aliases is not a tuple, list, or dictionary.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'pleasant': [4, 3],
    ...     'vibrant': [2, 5],
    ...     'other_col': [1, 2]
    ... })
    >>> rename_paqs(df)
       PAQ1  PAQ2  other_col
    0     4     2          1
    1     3     5          2
    >>> df_custom = pd.DataFrame({
    ...     'pl': [4, 3],
    ...     'vb': [2, 5],
    ... })
    >>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
       PAQ1  PAQ2
    0     4     2
    1     3     5

    """
    if paq_aliases is None:
        if any(paq_id in df.columns for paq_id in PAQ_IDS):
            logger.info("PAQs already correctly named.")
            return df
        if any(paq_name in df.columns for paq_name in PAQ_LABELS):
            paq_aliases = PAQ_LABELS

    if isinstance(paq_aliases, list | tuple):
        rename_dict = dict(zip(paq_aliases, PAQ_IDS, strict=False))
    elif isinstance(paq_aliases, dict):
        rename_dict = paq_aliases
    else:
        msg = "paq_aliases must be a tuple, list, or dictionary."
        raise TypeError(msg)

    logger.debug(f"Renaming PAQs with the following mapping: {rename_dict}")
    return df.rename(columns=rename_dict)

Processing

soundscapy.surveys.processing

Soundscape survey data processing module.

This module contains functions for processing and analyzing soundscape survey data, including ISO coordinate calculations, data quality checks, and SSM metrics.

Notes

The functions in this module are designed to be fairly general and can be used with any dataset in a similar format to the ISD. The key to this is using a simple dataframe/sheet with the following columns:

  • Index columns: e.g. LocationID, RecordID, GroupID, SessionID
  • Perceptual attributes: PAQ1, PAQ2, ..., PAQ8
  • Independent variables: e.g. Laeq, N5, Sharpness, etc.

The key functions of this module are designed to clean/validate datasets, calculate ISO coordinate values or SSM metrics, filter on index columns. Functions and operations which are specific to a particular dataset are located in their own modules under soundscape.databases.

CLASS DESCRIPTION
ISOCoordinates

Dataclass for storing ISO coordinates.

SSMMetrics

Dataclass for storing Structural Summary Method (SSM) metrics.

FUNCTION DESCRIPTION
calculate_iso_coords

Calculate the projected ISOPleasant and ISOEventful coordinates.

add_iso_coords

Calculate and add ISO coordinates as new columns in the DataFrame.

likert_data_quality

Perform basic quality checks on PAQ (Likert scale) data.

simulation

Generate random PAQ responses for simulation purposes.

ssm_metrics

Calculate the Structural Summary Method (SSM) metrics for each response.

ssm_cosine_fit

Fit a cosine model to the PAQ data for SSM analysis.

ipsatize

Participant-level ipsatization for circumplex analysis.

ISOCoordinates dataclass

ISOCoordinates(pleasant: float, eventful: float)

Dataclass for storing ISO coordinates.

SSMMetrics dataclass

SSMMetrics(
    amplitude: float,
    angle: float,
    elevation: float,
    displacement: float,
    r_squared: float,
)

Dataclass for storing Structural Summary Method (SSM) metrics.

METHOD DESCRIPTION
table

Generate a pandas Series containing specific attributes of the instance.

table
table() -> pd.Series

Generate a pandas Series containing specific attributes of the instance.

This method collects the values of the instance attributes related to amplitude, angle, elevation, displacement, and r_squared, and organizes them into a pandas Series. It is useful for presenting the data in a structured format suitable for further processing or analysis.

RETURNS DESCRIPTION
Series

A pandas Series containing the following key-value pairs:

  • "amplitude": instance attribute representing a certain magnitude.
  • "angle": instance attribute representing a specific angular measurement.
  • "elevation": instance attribute indicating a height or vertical position.
  • "displacement": instance attribute defining the movement or shift.
  • "r_squared": instance attribute denoting coefficient of determination.
Source code in src/soundscapy/surveys/processing.py
def table(self) -> pd.Series:
    """
    Generate a pandas Series containing specific attributes of the instance.

    This method collects the values of the instance attributes related to
    amplitude, angle, elevation, displacement, and r_squared, and organizes
    them into a pandas Series. It is useful for presenting the data in a
    structured format suitable for further processing or analysis.

    Returns
    -------
    :
        A pandas Series containing the following key-value pairs:

        - "amplitude": instance attribute representing a certain magnitude.
        - "angle": instance attribute representing a specific angular measurement.
        - "elevation": instance attribute indicating a height or vertical position.
        - "displacement": instance attribute defining the movement or shift.
        - "r_squared": instance attribute denoting coefficient of determination.

    """
    return pd.Series(
        {
            "amplitude": self.amplitude,
            "angle": self.angle,
            "elevation": self.elevation,
            "displacement": self.displacement,
            "r_squared": self.r_squared,
        }
    )

calculate_iso_coords

calculate_iso_coords(
    results_df: DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]

Calculate the projected ISOPleasant and ISOEventful coordinates.

PARAMETER DESCRIPTION
results_df

DataFrame containing PAQ data.

TYPE: DataFrame

val_range

(max, min) range of original PAQ responses, by default (5, 1)

TYPE: tuple[int, int] DEFAULT: (5, 1)

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] DEFAULT: EQUAL_ANGLES

RETURNS DESCRIPTION
tuple[Series, Series]

ISOPleasant and ISOEventful coordinate values

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
>>> iso_pleasant.round(2)
0   -0.03
1    0.47
dtype: float64
>>> iso_eventful.round(2)
0   -0.28
1    0.18
dtype: float64
Source code in src/soundscapy/surveys/processing.py
def calculate_iso_coords(
    results_df: pd.DataFrame,
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> tuple[pd.Series, pd.Series]:
    """
    Calculate the projected ISOPleasant and ISOEventful coordinates.

    Parameters
    ----------
    results_df
        DataFrame containing PAQ data.
    val_range
        (max, min) range of original PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        ISOPleasant and ISOEventful coordinate values

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> iso_pleasant, iso_eventful = calculate_iso_coords(df)
    >>> iso_pleasant.round(2)
    0   -0.03
    1    0.47
    dtype: float64
    >>> iso_eventful.round(2)
    0   -0.28
    1    0.18
    dtype: float64

    """
    scale = max(val_range) - min(val_range)

    paq_df = return_paqs(results_df, incl_ids=False)

    iso_pleasant = paq_df.apply(lambda row: _adj_iso_pl(row, angles, scale), axis=1)
    iso_eventful = paq_df.apply(lambda row: _adj_iso_ev(row, angles, scale), axis=1)

    logger.info(f"Calculated ISO coordinates for {len(results_df)} samples")
    return iso_pleasant, iso_eventful

add_iso_coords

add_iso_coords(
    data: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame

Calculate and add ISO coordinates as new columns in the DataFrame.

PARAMETER DESCRIPTION
data

Input DataFrame containing PAQ data

TYPE: DataFrame

val_range

(min, max) range of original PAQ responses, by default (1, 5)

TYPE: tuple[int, int] DEFAULT: (1, 5)

names

Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")

TYPE: tuple[str, str] DEFAULT: ('ISOPleasant', 'ISOEventful')

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] DEFAULT: EQUAL_ANGLES

overwrite

Whether to overwrite existing ISO coordinate columns, by default False

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
DataFrame

DataFrame with new ISO coordinate columns added

RAISES DESCRIPTION
Warning

If ISO coordinate columns already exist and overwrite is False

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> df_with_iso = add_iso_coords(df)
>>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
   ISOPleasant  ISOEventful
0        -0.03        -0.28
1         0.47         0.18
Source code in src/soundscapy/surveys/processing.py
def add_iso_coords(
    data: pd.DataFrame,
    val_range: tuple[int, int] = (1, 5),
    names: tuple[str, str] = ("ISOPleasant", "ISOEventful"),
    angles: tuple[int, ...] = EQUAL_ANGLES,
    *,
    overwrite: bool = False,
) -> pd.DataFrame:
    """
    Calculate and add ISO coordinates as new columns in the DataFrame.

    Parameters
    ----------
    data
        Input DataFrame containing PAQ data
    val_range
        (min, max) range of original PAQ responses, by default (1, 5)
    names
        Names for new coordinate columns, by default ("ISOPleasant", "ISOEventful")
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    overwrite
        Whether to overwrite existing ISO coordinate columns, by default False

    Returns
    -------
    :
        DataFrame with new ISO coordinate columns added

    Raises
    ------
    Warning
        If ISO coordinate columns already exist and overwrite is False

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> df_with_iso = add_iso_coords(df)
    >>> df_with_iso[['ISOPleasant', 'ISOEventful']].round(2)
       ISOPleasant  ISOEventful
    0        -0.03        -0.28
    1         0.47         0.18

    """
    for name in names:
        if name in data.columns:
            if overwrite:
                data = data.drop(name, axis=1)
            else:
                msg = (
                    f"{name} already in dataframe. Use `overwrite=True` to replace it."
                )
                raise Warning(msg)

    iso_pleasant, iso_eventful = calculate_iso_coords(
        data, val_range=val_range, angles=angles
    )
    data = data.assign(**{names[0]: iso_pleasant, names[1]: iso_eventful})

    logger.info(f"Added ISO coordinates to DataFrame with column names: {names}")
    return data

likert_data_quality

likert_data_quality(
    df: DataFrame,
    val_range: tuple[int, int] = (1, 5),
    *,
    allow_na: bool = False,
) -> list[int] | None

Perform basic quality checks on PAQ (Likert scale) data.

PARAMETER DESCRIPTION
df

DataFrame containing PAQ data

TYPE: DataFrame

allow_na

Whether to allow NaN values in PAQ data, by default False

TYPE: bool DEFAULT: False

val_range

Valid range for PAQ values, by default (1, 5)

TYPE: tuple[int, int] DEFAULT: (1, 5)

RETURNS DESCRIPTION
list[int] | None

Examples:

>>> import pandas as pd
>>> import numpy as np
>>> df = pd.DataFrame({
...     'PAQ1': [np.nan, 2, 3, 3], 'PAQ2': [3, 2, 6, 3], 'PAQ3': [2, 2, 3, 3],
...     'PAQ4': [1, 2, 3, 3], 'PAQ5': [5, 2, 3, 3], 'PAQ6': [3, 2, 3, 3],
...     'PAQ7': [4, 2, 3, 3], 'PAQ8': [2, 2, 3, 3]
... })
>>> likert_data_quality(df)
[0, 1, 2]
>>> likert_data_quality(df,allow_na=True)
[1, 2]
Source code in src/soundscapy/surveys/processing.py
def likert_data_quality(
    df: pd.DataFrame, val_range: tuple[int, int] = (1, 5), *, allow_na: bool = False
) -> list[int] | None:
    """
    Perform basic quality checks on PAQ (Likert scale) data.

    Parameters
    ----------
    df
        DataFrame containing PAQ data
    allow_na
        Whether to allow NaN values in PAQ data, by default False
    val_range
        Valid range for PAQ values, by default (1, 5)

    Returns
    -------
    :

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> df = pd.DataFrame({
    ...     'PAQ1': [np.nan, 2, 3, 3], 'PAQ2': [3, 2, 6, 3], 'PAQ3': [2, 2, 3, 3],
    ...     'PAQ4': [1, 2, 3, 3], 'PAQ5': [5, 2, 3, 3], 'PAQ6': [3, 2, 3, 3],
    ...     'PAQ7': [4, 2, 3, 3], 'PAQ8': [2, 2, 3, 3]
    ... })
    >>> likert_data_quality(df)
    [0, 1, 2]
    >>> likert_data_quality(df,allow_na=True)
    [1, 2]

    """
    paqs = return_paqs(df, incl_ids=False)
    invalid_indices = []

    for idx, row in paqs.iterrows():
        # Convert the index to int to ensure type compatibility
        row_idx = int(idx) if isinstance(idx, str) else idx
        row_array = row.to_numpy()
        is_constant = row_array.shape[0] > 0 and (row_array[0] == row_array).all()

        if (not allow_na and row.isna().any()) or (
            row.notna().all()
            and (
                row.min() < min(val_range)
                or row.max() > max(val_range)
                or (is_constant and row.iloc[0] != np.mean(val_range))
            )
        ):
            invalid_indices.append(row_idx)

    if invalid_indices:
        logger.info(f"Found {len(invalid_indices)} samples with data quality issues")
        return invalid_indices

    logger.info("PAQ data quality check passed")
    return None

simulation

simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame

Generate random PAQ responses for simulation purposes.

PARAMETER DESCRIPTION
n

Number of samples to simulate, by default 3000

TYPE: int DEFAULT: 3000

val_range

Range of values for PAQ responses, by default (1, 5)

TYPE: tuple[int, int] DEFAULT: (1, 5)

seed

Optional random seed for deterministic output, by default None

TYPE: int | None DEFAULT: None

incl_iso_coords

Whether to add calculated ISO coordinates, by default False

TYPE: bool DEFAULT: False

**coord_kwargs

Optional keyword arguments passed directly to the add_iso_coords function if incl_iso_coords is True. These can include:

  • names (tuple[str, str]): Names for the new ISO coordinate columns.
  • angles (tuple[int, ...]): Angles for each PAQ used in calculation.
  • overwrite (bool): Whether to overwrite existing ISO coordinate columns.

TYPE: Unpack[_AddISOCoordsKwargs] DEFAULT: {}

RETURNS DESCRIPTION
DataFrame

DataFrame of randomly generated PAQ responses

Examples:

>>> data = simulation(n=5,incl_iso_coords=True)
>>> data.shape
(5, 10)
>>> list(data.columns)
['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']
Source code in src/soundscapy/surveys/processing.py
def simulation(
    n: int = 3000,
    val_range: tuple[int, int] = (1, 5),
    *,
    seed: int | None = None,
    incl_iso_coords: bool = False,
    **coord_kwargs: Unpack[_AddISOCoordsKwargs],
) -> pd.DataFrame:
    """
    Generate random PAQ responses for simulation purposes.

    Parameters
    ----------
    n
        Number of samples to simulate, by default 3000
    val_range
        Range of values for PAQ responses, by default (1, 5)
    seed
        Optional random seed for deterministic output, by default None
    incl_iso_coords
        Whether to add calculated ISO coordinates, by default False
    **coord_kwargs
        Optional keyword arguments passed directly to the `add_iso_coords` function
        if `incl_iso_coords` is True. These can include:

        - `names` (tuple[str, str]): Names for the new ISO coordinate columns.
        - `angles` (tuple[int, ...]): Angles for each PAQ used in calculation.
        - `overwrite` (bool): Whether to overwrite existing ISO coordinate columns.

    Returns
    -------
    :
        DataFrame of randomly generated PAQ responses

    Examples
    --------
    >>> data = simulation(n=5,incl_iso_coords=True)
    >>> data.shape
    (5, 10)
    >>> list(data.columns)
    ['PAQ1', 'PAQ2', 'PAQ3', 'PAQ4', 'PAQ5', 'PAQ6', 'PAQ7', 'PAQ8', 'ISOPleasant', 'ISOEventful']

    """  # noqa: E501
    data = pd.DataFrame(
        np.random.default_rng(seed).integers(
            min(val_range), max(val_range) + 1, size=(n, 8)
        ),
        columns=PAQ_IDS,
    )

    if incl_iso_coords:
        data = add_iso_coords(data, val_range=val_range, **coord_kwargs)

    logger.info(f"Generated simulated PAQ data with {n} samples")
    return data

ssm_metrics

ssm_metrics(
    df: DataFrame,
    paq_cols: list[str] = PAQ_IDS,
    method: str = "cosine",
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> pd.DataFrame

Calculate the Structural Summary Method (SSM) metrics for each response.

PARAMETER DESCRIPTION
df

DataFrame containing PAQ data

TYPE: DataFrame

paq_cols

List of PAQ column names, by default PAQ_IDS

TYPE: list[str] DEFAULT: PAQ_IDS

method

Method to calculate SSM metrics, either "cosine" or "polar", by default "cosine"

TYPE: str DEFAULT: 'cosine'

val_range

Range of values for PAQ responses, by default (5, 1)

TYPE: tuple[int, int] DEFAULT: (5, 1)

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] DEFAULT: EQUAL_ANGLES

RETURNS DESCRIPTION
DataFrame

DataFrame containing the SSM metrics

RAISES DESCRIPTION
ValueError

If PAQ columns are not present in the DataFrame or if an invalid method is specified

Examples:

>>>
>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
... })
>>> ssm_metrics(data).round(2)
   amplitude   angle  elevation  displacement  r_squared
0       0.68  263.82      10.57         -7.57       0.15
1       1.21   20.63       0.01          3.11       0.39
Source code in src/soundscapy/surveys/processing.py
def ssm_metrics(
    df: pd.DataFrame,
    paq_cols: list[str] = PAQ_IDS,
    method: str = "cosine",
    val_range: tuple[int, int] = (5, 1),
    angles: tuple[int, ...] = EQUAL_ANGLES,
) -> pd.DataFrame:
    """
    Calculate the Structural Summary Method (SSM) metrics for each response.

    Parameters
    ----------
    df
        DataFrame containing PAQ data
    paq_cols
        List of PAQ column names, by default PAQ_IDS
    method
        Method to calculate SSM metrics, either "cosine" or "polar", by default "cosine"
    val_range
        Range of values for PAQ responses, by default (5, 1)
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES

    Returns
    -------
    :
        DataFrame containing the SSM metrics

    Raises
    ------
    ValueError
        If PAQ columns are not present in the DataFrame
        or if an invalid method is specified

    Examples
    --------
    >>> # doctest: +SKIP
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [4, 2], 'PAQ2': [3, 5], 'PAQ3': [2, 4], 'PAQ4': [1, 3],
    ...     'PAQ5': [5, 1], 'PAQ6': [3, 2], 'PAQ7': [4, 3], 'PAQ8': [2, 5]
    ... })
    >>> ssm_metrics(data).round(2)
       amplitude   angle  elevation  displacement  r_squared
    0       0.68  263.82      10.57         -7.57       0.15
    1       1.21   20.63       0.01          3.11       0.39

    """
    # TODO(MitchellAcoustics): Replace with a call to circumplex package
    warnings.warn(
        "This function is not yet fully implemented."
        "See https://github.com/MitchellAcoustics/circumplex for a "
        "more complete implementation.",
        PendingDeprecationWarning,
        stacklevel=2,
    )

    if not set(paq_cols).issubset(df.columns):
        msg = f"PAQ columns {paq_cols} not present in DataFrame"
        raise ValueError(msg)

    if method == "polar":
        iso_pleasant, iso_eventful = calculate_iso_coords(
            df[paq_cols], val_range, angles
        )
        r, theta = _convert_to_polar_coords(
            iso_pleasant.to_numpy(), iso_eventful.to_numpy()
        )
        mean = df[paq_cols].mean(axis=1)
        mean = mean / (max(val_range) - min(val_range)) if val_range != (0, 1) else mean

        return pd.DataFrame(
            {
                "amplitude": r,
                "angle": theta,
                "elevation": mean,
                "displacement": 0,  # Displacement is always 0 for polar method
                "r_squared": 1,  # R-squared is always 1 for polar method
            }
        )
    if method == "cosine":
        return df[paq_cols].apply(
            lambda y: ssm_cosine_fit(y, angles).table(),
            axis=1,
            result_type="expand",
        )
    msg = "Method must be either 'polar' or 'cosine'"
    raise ValueError(msg)

ssm_cosine_fit

ssm_cosine_fit(
    y: Series,
    angles: tuple[int, ...] | ndarray = EQUAL_ANGLES,
    bounds: tuple[list[float], list[float]] = (
        [0, 0, 0, -np.inf],
        [np.inf, 360, np.inf, np.inf],
    ),
) -> SSMMetrics

Fit a cosine model to the PAQ data for SSM analysis.

PARAMETER DESCRIPTION
y

Series of PAQ values

TYPE: Series

angles

Angles for each PAQ in degrees, by default EQUAL_ANGLES

TYPE: tuple[int, ...] | ndarray DEFAULT: EQUAL_ANGLES

bounds

Bounds for the optimization parameters, by default ([0, 0, 0, -np.inf], [np.inf, 360, np.inf, np.inf])

TYPE: tuple[list[float], list[float]] DEFAULT: ([0, 0, 0, -inf], [inf, 360, inf, inf])

RETURNS DESCRIPTION
SSMMetrics

Calculated SSM metrics

Examples:

>>>
>>> import pandas as pd
>>> y = pd.Series([4, 3, 2, 1, 5, 3, 4, 2])
>>> metrics = ssm_cosine_fit(y)
>>> [round(v, 2) if isinstance(v, float) else v for v in metrics.table()]
[0.68, 263.82, 10.57, -7.57, 0.15]
Source code in src/soundscapy/surveys/processing.py
def ssm_cosine_fit(
    y: pd.Series,
    angles: tuple[int, ...] | np.ndarray = EQUAL_ANGLES,
    bounds: tuple[list[float], list[float]] = (
        [0, 0, 0, -np.inf],
        [np.inf, 360, np.inf, np.inf],
    ),
) -> SSMMetrics:
    """
    Fit a cosine model to the PAQ data for SSM analysis.

    Parameters
    ----------
    y
        Series of PAQ values
    angles
        Angles for each PAQ in degrees, by default EQUAL_ANGLES
    bounds
        Bounds for the optimization parameters,
        by default ([0, 0, 0, -np.inf], [np.inf, 360, np.inf, np.inf])

    Returns
    -------
    :
        Calculated SSM metrics

    Examples
    --------
    >>> # doctest: +SKIP
    >>> import pandas as pd
    >>> y = pd.Series([4, 3, 2, 1, 5, 3, 4, 2])
    >>> metrics = ssm_cosine_fit(y)
    >>> [round(v, 2) if isinstance(v, float) else v for v in metrics.table()]
    [0.68, 263.82, 10.57, -7.57, 0.15]

    """
    warnings.warn(
        "This function is not yet fully implemented."
        "See https://github.com/MitchellAcoustics/circumplex "
        "for a more complete implementation.",
        PendingDeprecationWarning,
        stacklevel=2,
    )

    def _cosine_model(
        theta: np.ndarray, amp: float, delta: float, elev: float, dev: float
    ) -> np.ndarray:
        return elev + amp * np.cos(np.radians(theta - delta)) + dev

    param, _ = optimize.curve_fit(
        _cosine_model,
        xdata=angles,
        ydata=y,
        bounds=bounds,
    )
    amp, delta, elev, dev = param
    angles = np.array(angles) if isinstance(angles, tuple) else angles
    r_squared = _r2_score(y.to_numpy(), _cosine_model(angles, *param))

    return SSMMetrics(
        amplitude=amp,
        angle=delta,
        elevation=elev,
        displacement=dev,
        r_squared=r_squared,
    )

ipsatize

ipsatize(
    data: DataFrame,
    method: Literal[
        "grand_mean", "column_wise", "row_wise"
    ] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame

Participant-level ipsatization for circumplex analysis.

Removes systematic response biases before computing a correlation matrix. The choice of method depends on the study design and the type of bias being corrected.

PARAMETER DESCRIPTION
data

DataFrame containing PAQ scale columns and (for participant-level methods) a grouping column.

TYPE: DataFrame

method

Centering strategy:

"grand_mean" (default) — one scalar per participant: the mean across all PAQ values and all observations for that participant. Removes overall response-level differences between participants. Matches the published SATP analysis (Aletta et al., 2024) and the original R implementation.

"column_wise" — eight scalars per participant: the per-scale mean across that participant's observations. Removes scale-specific response biases. This is the behaviour of the legacy :func:person_center function.

"row_wise" — one scalar per observation: the mean across all PAQ scales within that observation. Removes the general impression of each individual soundscape stimulus. Equivalent to circumplex.ipsatize().

TYPE: Literal['grand_mean', 'column_wise', 'row_wise'] DEFAULT: 'grand_mean'

participant_col

Column used to group observations by participant. Required for "grand_mean" and "column_wise"; ignored for "row_wise".

TYPE: str DEFAULT: 'participant'

scales

PAQ column names to centre. Defaults to :data:PAQ_IDS when None.

TYPE: list[str] | None DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame containing only the scale columns with centred values. The participant_col grouping column is excluded from the result.

RAISES DESCRIPTION
KeyError

If participant_col is not present in data when method is "grand_mean" or "column_wise".

Examples:

>>> import pandas as pd
>>> data = pd.DataFrame({
...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
...     'participant': ['A', 'A', 'B', 'B'],
... })
>>> result = ipsatize(data, method="grand_mean")
>>> result['PAQ1'].tolist()
[-5.0, 5.0, 5.0, -5.0]
Source code in src/soundscapy/surveys/processing.py
def ipsatize(
    data: pd.DataFrame,
    method: Literal["grand_mean", "column_wise", "row_wise"] = "grand_mean",
    participant_col: str = "participant",
    scales: list[str] | None = None,
) -> pd.DataFrame:
    """
    Participant-level ipsatization for circumplex analysis.

    Removes systematic response biases before computing a correlation matrix.
    The choice of method depends on the study design and the type of bias
    being corrected.

    Parameters
    ----------
    data
        DataFrame containing PAQ scale columns and (for participant-level
        methods) a grouping column.
    method
        Centering strategy:

        ``"grand_mean"`` *(default)* — one scalar per participant: the mean
        across *all* PAQ values and *all* observations for that participant.
        Removes overall response-level differences between participants.
        **Matches the published SATP analysis (Aletta et al., 2024) and the
        original R implementation.**

        ``"column_wise"`` — eight scalars per participant: the per-scale mean
        across that participant's observations.  Removes scale-specific
        response biases.  This is the behaviour of the legacy
        :func:`person_center` function.

        ``"row_wise"`` — one scalar per observation: the mean across all PAQ
        scales within that observation.  Removes the general impression of
        each individual soundscape stimulus.  Equivalent to
        ``circumplex.ipsatize()``.
    participant_col
        Column used to group observations by participant.  Required for
        ``"grand_mean"`` and ``"column_wise"``; ignored for ``"row_wise"``.
    scales
        PAQ column names to centre.  Defaults to :data:`PAQ_IDS` when
        ``None``.

    Returns
    -------
    :
        DataFrame containing only the scale columns with centred values.
        The ``participant_col`` grouping column is excluded from the result.

    Raises
    ------
    KeyError
        If ``participant_col`` is not present in ``data`` when
        ``method`` is ``"grand_mean"`` or ``"column_wise"``.

    Examples
    --------
    >>> import pandas as pd
    >>> data = pd.DataFrame({
    ...     'PAQ1': [50., 60., 40., 30.], 'PAQ2': [50., 60., 40., 30.],
    ...     'PAQ3': [50., 60., 40., 30.], 'PAQ4': [50., 60., 40., 30.],
    ...     'PAQ5': [50., 60., 40., 30.], 'PAQ6': [50., 60., 40., 30.],
    ...     'PAQ7': [50., 60., 40., 30.], 'PAQ8': [50., 60., 40., 30.],
    ...     'participant': ['A', 'A', 'B', 'B'],
    ... })
    >>> result = ipsatize(data, method="grand_mean")
    >>> result['PAQ1'].tolist()
    [-5.0, 5.0, 5.0, -5.0]

    """
    _scales = scales if scales is not None else PAQ_IDS

    if method == "column_wise":
        means = data.groupby(participant_col)[_scales].transform("mean")
        return data[_scales] - means

    if method == "grand_mean":
        # Compute a single scalar per participant: mean across all PAQ values
        # and all observations for that participant.  Use nanmean so that
        # participants with partial NaN data still get a valid grand mean
        # computed from their non-NaN values; NaN rows are then removed by
        # downstream listwise deletion rather than silently expanding data loss
        # to the whole participant.
        grand_means = data.groupby(participant_col)[_scales].apply(
            lambda df: float(np.nanmean(df.values))
        )
        grand_mean_per_row = data[participant_col].map(grand_means)
        return data[_scales].subtract(grand_mean_per_row, axis=0)

    if method == "row_wise":
        row_means = data[_scales].mean(axis=1)
        return data[_scales].sub(row_means, axis=0)

    msg = f"method must be 'grand_mean', 'column_wise', or 'row_wise'; got {method!r}"
    raise ValueError(msg)

Survey utilities

soundscapy.surveys.survey_utils

Core utility functions for processing soundscape survey data.

This module contains fundamental functions and constants used across the soundscapy package for handling and analyzing soundscape survey data.

CLASS DESCRIPTION
PAQ

Enumeration of Perceptual Attribute Questions (PAQ) names and IDs.

PAQDfSchema

Pandera schema for validating PAQ (Perceptual Attribute Questions) DataFrames.

LikertScale

Contains different Likert scale options for survey questions.

FUNCTION DESCRIPTION
return_paqs

Return only the PAQ columns from a DataFrame.

rename_paqs

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

mean_responses

Calculate the mean responses for each PAQ group.

PAQ

PAQ(label: str, id: str)

Bases: Enum

Enumeration of Perceptual Attribute Questions (PAQ) names and IDs.

Initialize a PAQ enum member.

PARAMETER DESCRIPTION
label

The descriptive label for the PAQ (e.g., 'pleasant').

TYPE: str

id

The standard identifier for the PAQ (e.g., 'PAQ1').

TYPE: str

Source code in src/soundscapy/surveys/survey_utils.py
def __init__(self, label: str, id: str) -> None:  # noqa: A002
    """
    Initialize a PAQ enum member.

    Parameters
    ----------
    label
        The descriptive label for the PAQ (e.g., 'pleasant').
    id
        The standard identifier for the PAQ (e.g., 'PAQ1').

    """
    self.label = label
    self.id = id

PAQDfSchema

Bases: DataFrameModel

Pandera schema for validating PAQ (Perceptual Attribute Questions) DataFrames.

This schema defines the expected structure and data types for DataFrames containing soundscape survey data with PAQ responses and associated metadata. It includes automatic column name coercion to standardize various input formats.

ATTRIBUTE DESCRIPTION
PAQ1-PAQ8

Perceptual Attribute Question responses (1-8) on a Likert scale. Nullable to allow for missing responses.

TYPE: Series[float]

language

Language code for the survey responses. Optional field.

TYPE: Series[str] | None

location_id

Identifier for the survey location. Optional field.

TYPE: Series[str] | None

session_id

Identifier for the survey session. Optional field.

TYPE: Series[str] | None

group_id

Identifier for the survey group. Optional field.

TYPE: Series[str] | None

record_id

Unique identifier for each survey record. Optional field.

TYPE: Series[str] | None

METHOD DESCRIPTION
column_name_coercion

Coerce column names to standardized format for PAQ data.

column_name_coercion
column_name_coercion(df: DataFrame) -> DataFrame

Coerce column names to standardized format for PAQ data.

This parser automatically renames columns to match the expected schema:

  • PAQ label names (e.g., 'pleasant') to PAQ IDs (e.g., 'PAQ1')
  • Legacy ID column names to lowercase snake_case format
PARAMETER DESCRIPTION
cls

The schema class (automatically passed by pandera).

df

Input DataFrame with potentially non-standard column names.

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

DataFrame with standardized column names.

Source code in src/soundscapy/surveys/survey_utils.py
@pa.dataframe_parser
def column_name_coercion(cls, df: DataFrame) -> DataFrame:  # noqa: N805
    """
    Coerce column names to standardized format for PAQ data.

    This parser automatically renames columns to match the expected schema:

    - PAQ label names (e.g., 'pleasant') to PAQ IDs (e.g., 'PAQ1')
    - Legacy ID column names to lowercase snake_case format

    Parameters
    ----------
    cls
        The schema class (automatically passed by pandera).
    df
        Input DataFrame with potentially non-standard column names.

    Returns
    -------
    :
        DataFrame with standardized column names.

    """
    # Create mapping from PAQ labels to standard PAQ IDs
    rename_dict = dict(zip(PAQ_LABELS, PAQ_IDS, strict=False))

    # Add mappings for legacy ID column names to snake_case format
    rename_dict.update(
        {
            "LocationID": "location_id",
            "SessionID": "session_id",
            "GroupID": "group_id",
            "RecordID": "record_id",
        }
    )
    return df.rename(columns=rename_dict)

LikertScale dataclass

LikertScale(
    paq: Scale = (
        lambda: [
            "Strongly disagree",
            "Somewhat disagree",
            "Neutral",
            "Somewhat agree",
            "Strongly agree",
        ]
    )(),
    source: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "A lot",
            "Dominates completely",
        ]
    )(),
    overall: Scale = (
        lambda: [
            "Very bad",
            "Bad",
            "Neither bad nor good",
            "Good",
            "Very good",
        ]
    )(),
    appropriate: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "A lot",
            "Perfectly",
        ]
    )(),
    loud: Scale = (
        lambda: [
            "Not at all",
            "A little",
            "Moderately",
            "Very",
            "Extremely",
        ]
    )(),
    often: Scale = (
        lambda: [
            "Never / This is my first time here",
            "Rarely",
            "Sometimes",
            "Often",
            "Very often",
        ]
    )(),
    visit: Scale = (
        lambda: [
            "Never",
            "Rarely",
            "Sometimes",
            "Often",
            "Very often",
        ]
    )(),
)

Contains different Likert scale options for survey questions.

This class provides standardized 5-point Likert scales questions commonly used in acoustic and soundscape surveys.

ATTRIBUTE DESCRIPTION
PAQ

Agreement scale from "Strongly disagree" to "Strongly agree"

SOURCE

Source perception scale from "Not at all" to "Dominates completely"

OVERALL

Quality assessment scale from "Very bad" to "Very good"

APPROPRIATE

Appropriateness scale from "Not at all" to "Perfectly"

LOUD

Loudness perception scale from "Not at all" to "Extremely"

OFTEN

Frequency scale with first-time option from "Never / This is my first time here" to "Very often"

VISIT

Standard frequency scale from "Never" to "Very often"

return_paqs

return_paqs(
    df: DataFrame,
    other_cols: list[str] | None = None,
    *,
    incl_ids: bool = True,
) -> pd.DataFrame

Return only the PAQ columns from a DataFrame.

PARAMETER DESCRIPTION
df

Input DataFrame containing PAQ data.

TYPE: DataFrame

other_cols

Other columns to include in the output, by default None.

TYPE: list[str] | None DEFAULT: None

incl_ids

Whether to include ID columns (RecordID, GroupID, etc.), by default True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
DataFrame

DataFrame containing only the PAQ columns and optionally ID and other specified columns.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'RecordID': [1, 2],
...     'PAQ1': [4, 3],
...     'PAQ2': [2, 5],
...     'PAQ3': [1, 2],
...     'PAQ4': [3, 4],
...     'PAQ5': [5, 1],
...     'PAQ6': [2, 3],
...     'PAQ7': [4, 5],
...     'PAQ8': [1, 2],
...     'OtherCol': ['A', 'B']
... })
>>> return_paqs(df)
   RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
0         1     4     2     1     3     5     2     4     1
1         2     3     5     2     4     1     3     5     2
>>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
   PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
0     4     2     1     3     5     2     4     1        A
1     3     5     2     4     1     3     5     2        B
Source code in src/soundscapy/surveys/survey_utils.py
def return_paqs(
    df: pd.DataFrame, other_cols: list[str] | None = None, *, incl_ids: bool = True
) -> pd.DataFrame:
    """
    Return only the PAQ columns from a DataFrame.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    other_cols
        Other columns to include in the output, by default None.
    incl_ids
        Whether to include ID columns (RecordID, GroupID, etc.), by default True.

    Returns
    -------
    :
        DataFrame containing only the PAQ columns and optionally ID and other specified
        columns.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'RecordID': [1, 2],
    ...     'PAQ1': [4, 3],
    ...     'PAQ2': [2, 5],
    ...     'PAQ3': [1, 2],
    ...     'PAQ4': [3, 4],
    ...     'PAQ5': [5, 1],
    ...     'PAQ6': [2, 3],
    ...     'PAQ7': [4, 5],
    ...     'PAQ8': [1, 2],
    ...     'OtherCol': ['A', 'B']
    ... })
    >>> return_paqs(df)
       RecordID  PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8
    0         1     4     2     1     3     5     2     4     1
    1         2     3     5     2     4     1     3     5     2
    >>> return_paqs(df, incl_ids=False, other_cols=['OtherCol'])
       PAQ1  PAQ2  PAQ3  PAQ4  PAQ5  PAQ6  PAQ7  PAQ8 OtherCol
    0     4     2     1     3     5     2     4     1        A
    1     3     5     2     4     1     3     5     2        B

    """
    cols = PAQ_IDS.copy()

    if incl_ids:
        id_cols = [
            name
            for name in ["RecordID", "GroupID", "SessionID", "LocationID"]
            if name in df.columns
        ]
        cols = id_cols + cols

    if other_cols:
        cols.extend(other_cols)

    logger.debug(f"Returning PAQ columns: {cols}")
    return df[cols]

rename_paqs

rename_paqs(
    df: DataFrame,
    paq_aliases: list | tuple | dict | None = None,
) -> pd.DataFrame

Rename the PAQ columns in a DataFrame to standard PAQ IDs.

PARAMETER DESCRIPTION
df

Input DataFrame containing PAQ data.

TYPE: DataFrame

paq_aliases

Specify which PAQs are to be renamed. If None, will check if the column names are in pre-defined options. If a tuple, the order must match PAQ_IDS. If a dict, keys are current names and values are desired PAQ IDs.

TYPE: list | tuple | dict | None DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame with renamed PAQ columns.

RAISES DESCRIPTION
ValueError

If paq_aliases is not a tuple, list, or dictionary.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({
...     'pleasant': [4, 3],
...     'vibrant': [2, 5],
...     'other_col': [1, 2]
... })
>>> rename_paqs(df)
   PAQ1  PAQ2  other_col
0     4     2          1
1     3     5          2
>>> df_custom = pd.DataFrame({
...     'pl': [4, 3],
...     'vb': [2, 5],
... })
>>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
   PAQ1  PAQ2
0     4     2
1     3     5
Source code in src/soundscapy/surveys/survey_utils.py
def rename_paqs(
    df: pd.DataFrame, paq_aliases: list | tuple | dict | None = None
) -> pd.DataFrame:
    """
    Rename the PAQ columns in a DataFrame to standard PAQ IDs.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    paq_aliases
        Specify which PAQs are to be renamed. If None, will check if the column names
        are in pre-defined options. If a tuple, the order must match PAQ_IDS.
        If a dict, keys are current names and values are desired PAQ IDs.

    Returns
    -------
    :
        DataFrame with renamed PAQ columns.

    Raises
    ------
    ValueError
        If paq_aliases is not a tuple, list, or dictionary.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'pleasant': [4, 3],
    ...     'vibrant': [2, 5],
    ...     'other_col': [1, 2]
    ... })
    >>> rename_paqs(df)
       PAQ1  PAQ2  other_col
    0     4     2          1
    1     3     5          2
    >>> df_custom = pd.DataFrame({
    ...     'pl': [4, 3],
    ...     'vb': [2, 5],
    ... })
    >>> rename_paqs(df_custom, paq_aliases={'pl': 'PAQ1', 'vb': 'PAQ2'})
       PAQ1  PAQ2
    0     4     2
    1     3     5

    """
    if paq_aliases is None:
        if any(paq_id in df.columns for paq_id in PAQ_IDS):
            logger.info("PAQs already correctly named.")
            return df
        if any(paq_name in df.columns for paq_name in PAQ_LABELS):
            paq_aliases = PAQ_LABELS

    if isinstance(paq_aliases, list | tuple):
        rename_dict = dict(zip(paq_aliases, PAQ_IDS, strict=False))
    elif isinstance(paq_aliases, dict):
        rename_dict = paq_aliases
    else:
        msg = "paq_aliases must be a tuple, list, or dictionary."
        raise TypeError(msg)

    logger.debug(f"Renaming PAQs with the following mapping: {rename_dict}")
    return df.rename(columns=rename_dict)

mean_responses

mean_responses(df: DataFrame, group: str) -> pd.DataFrame

Calculate the mean responses for each PAQ group.

PARAMETER DESCRIPTION
df

Input DataFrame containing PAQ data.

TYPE: DataFrame

group

Column name to group by.

TYPE: str

RETURNS DESCRIPTION
DataFrame

DataFrame with mean responses for each PAQ group.

Source code in src/soundscapy/surveys/survey_utils.py
def mean_responses(df: pd.DataFrame, group: str) -> pd.DataFrame:
    """
    Calculate the mean responses for each PAQ group.

    Parameters
    ----------
    df
        Input DataFrame containing PAQ data.
    group
        Column name to group by.

    Returns
    -------
    :
        DataFrame with mean responses for each PAQ group.

    """
    data = return_paqs(df, other_cols=[group], incl_ids=False)
    return data.groupby(group).mean().reset_index()