diff --git a/docs/reference/databases.md b/docs/reference/databases.md index 7024a15..cad516c 100644 --- a/docs/reference/databases.md +++ b/docs/reference/databases.md @@ -9,3 +9,9 @@ filters: ["!ISDAccessor", "!_"] ## Soundscape Attributes Translation Project (SATP) ::: soundscapy.databases.satp + +## SATP Translation Testing + +::: soundscapy.databases.satp_testing +options: +filters: ["!_"] diff --git a/src/soundscapy/databases/__init__.py b/src/soundscapy/databases/__init__.py index f5310b2..995dedf 100644 --- a/src/soundscapy/databases/__init__.py +++ b/src/soundscapy/databases/__init__.py @@ -2,9 +2,10 @@ Soundscapy Databases Module. This module handles connections to and operations on soundscape databases, -primarily focused on the International Soundscape Database (ISD). +primarily focused on the International Soundscape Database (ISD) and the +Soundscape Attributes Translation Project (SATP). """ -from soundscapy.databases import isd, satp +from soundscapy.databases import isd, satp, satp_testing -__all__ = ["isd", "satp"] +__all__ = ["isd", "satp", "satp_testing"] diff --git a/src/soundscapy/databases/satp_testing.py b/src/soundscapy/databases/satp_testing.py new file mode 100644 index 0000000..1dfefca --- /dev/null +++ b/src/soundscapy/databases/satp_testing.py @@ -0,0 +1,500 @@ +""" +Module for translation testing of soundscape attributes based on SATP methodology. + +This module provides functions for computing translation quality metrics and +statistical tests for validating translations of soundscape attributes. It is +based on the methodology from the Soundscape Attributes Translation Project (SATP). + +The translation quality is assessed using multiple criteria: +- APPR: Appropriateness (0-1 scale, normalized from 0-10) +- UNDR: Understandability (0-1 scale, normalized from 0-10) +- CLAR: Clarity (computed from association with word and counter-word) +- ANTO: Antonymy (for main axes only) +- ORTH: Orthogonality (for main axes only) +- NCON: Non-confusability (for main axes only) +- IBAL: Importance balance +- CONN: Connectedness (for derived axes only) + +Examples +-------- +>>> import pandas as pd +>>> import soundscapy.databases.satp_testing as satp_testing # doctest: +SKIP +>>> # Compute main axis criteria from raw data +>>> df = pd.DataFrame({ # doctest: +SKIP +... 'COUNTRY': ['SG', 'SG'], +... 'APPR': [8.0, 9.0], +... 'UNDR': [7.5, 8.5], +... 'ANTO': [8.0, 7.0], +... 'BIAS': [5.0, 4.5], +... 'ASSOCCW': [6.0, 5.5], +... 'IMPCCW': [4.0, 3.5], +... 'ASSOCW': [7.0, 6.5], +... 'IMPCW': [5.0, 4.5], +... 'CANDIDATE': ['pleasant', 'pleasant'] +... }) +>>> result = satp_testing.compute_main_axis_criteria(df) # doctest: +SKIP +>>> 'CLAR' in result.columns # doctest: +SKIP +True + +References +---------- +Based on the R code from the SATP project: +https://github.com/ntudsp/satp-zsm-stage1 + +""" + +from typing import Literal + +import numpy as np +import pandas as pd +from loguru import logger +from scipy import stats + +# Constants for statistical tests +MANN_WHITNEY_NUM_GROUPS = 2 # Mann-Whitney test requires exactly 2 groups + + +def compute_main_axis_criteria(df: pd.DataFrame) -> pd.DataFrame: + """ + Compute translation quality criteria for main axis attributes. + + Main axes are the primary soundscape dimensions (e.g., pleasant-annoying, + eventful-uneventful) that have antonymic relationships. + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing raw survey responses with columns: + - APPR: Appropriateness rating (0-10 scale) + - UNDR: Understandability rating (0-10 scale) + - ANTO: Antonymy rating (0-10 scale) + - BIAS: Bias rating (0-10 scale) + - ASSOCW: Association with word (0-10 scale) + - ASSOCCW: Association with counter-word (0-10 scale) + - IMPCW: Importance with word (0-10 scale) + - IMPCCW: Importance with counter-word (0-10 scale) + Additional columns (e.g., COUNTRY, CANDIDATE) will be preserved. + + Returns + ------- + pd.DataFrame + DataFrame with computed criteria: + - APPR: Normalized appropriateness (0-1 scale) + - UNDR: Normalized understandability (0-1 scale) + - ANTO: Normalized antonymy (0-1 scale) + - CLAR: Clarity (0-1 scale, higher is better) + - ORTH: Orthogonality (0-1 scale, higher is better) + - NCON: Non-confusability (0-1 scale, higher is better) + - IBAL: Importance balance (0-1 scale, higher is better) + Plus all original non-computed columns. + + Notes + ----- + The criteria are computed as follows: + - APPR, UNDR, ANTO: Normalized from 0-10 to 0-1 scale + - CLAR: 1 - 0.5*(ASSOCW/10) - 0.5*(ASSOCCW/10) + - ORTH: 1 - 2*|BIAS/10 - 0.5| + - NCON: 1 - 0.5*(IMPCW/10 + IMPCCW/10) + - IBAL: 1 - |IMPCCW/10 - IMPCW/10| + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'APPR': [8.0, 9.0], + ... 'UNDR': [7.5, 8.5], + ... 'ANTO': [8.0, 7.0], + ... 'BIAS': [5.0, 4.5], + ... 'ASSOCCW': [6.0, 5.5], + ... 'IMPCCW': [4.0, 3.5], + ... 'ASSOCW': [7.0, 6.5], + ... 'IMPCW': [5.0, 4.5], + ... 'CANDIDATE': ['pleasant', 'pleasant'] + ... }) + >>> result = compute_main_axis_criteria(df) + >>> result['APPR'].iloc[0] + 0.8 + >>> round(result['CLAR'].iloc[0], 2) + 0.35 + >>> result['ORTH'].iloc[0] + 1.0 + + """ + result = df.copy() + + # Normalize 0-10 scale to 0-1 scale + result["APPR"] = result["APPR"] / 10 + result["UNDR"] = result["UNDR"] / 10 + result["ANTO"] = result["ANTO"] / 10 + + # Compute derived criteria + result["CLAR"] = 1 - 0.5 * result["ASSOCW"] / 10 - 0.5 * result["ASSOCCW"] / 10 + result["ORTH"] = 1 - 2 * np.abs(result["BIAS"] / 10 - 0.5) + result["NCON"] = 1 - 0.5 * (result["IMPCW"] / 10 + result["IMPCCW"] / 10) + result["IBAL"] = 1 - np.abs(result["IMPCCW"] / 10 - result["IMPCW"] / 10) + + logger.debug("Computed main axis criteria for dataset") + return result + + +def compute_derived_axis_criteria(df: pd.DataFrame) -> pd.DataFrame: + """ + Compute translation quality criteria for derived axis attributes. + + Derived axes are secondary soundscape dimensions (e.g., vibrant, calm, + monotonous, chaotic) that do not necessarily have direct antonyms. + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing raw survey responses with columns: + - APPR: Appropriateness rating (0-10 scale) + - UNDR: Understandability rating (0-10 scale) + - ASSOCW: Association with word (0-10 scale) + - ASSOCCW: Association with counter-word (0-10 scale) + - IMPCW: Importance with word (0-10 scale) + - IMPCCW: Importance with counter-word (0-10 scale) + Additional columns (e.g., COUNTRY, CANDIDATE) will be preserved. + + Returns + ------- + pd.DataFrame + DataFrame with computed criteria: + - APPR: Normalized appropriateness (0-1 scale) + - UNDR: Normalized understandability (0-1 scale) + - CLAR: Clarity (0-1 scale, higher is better) + - CONN: Connectedness (0-1 scale, higher is better) + - IBAL: Importance balance (0-1 scale, higher is better) + Plus all original non-computed columns. + + Notes + ----- + The criteria are computed as follows: + - APPR, UNDR: Normalized from 0-10 to 0-1 scale + - CLAR: 1 - 0.5*(ASSOCW/10) - 0.5*(ASSOCCW/10) + - CONN: 0.5*(IMPCW/10 + IMPCCW/10) + - IBAL: 1 - |IMPCCW/10 - IMPCW/10| + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'APPR': [8.0, 9.0], + ... 'UNDR': [7.5, 8.5], + ... 'ASSOCCW': [6.0, 5.5], + ... 'IMPCCW': [4.0, 3.5], + ... 'ASSOCW': [7.0, 6.5], + ... 'IMPCW': [5.0, 4.5], + ... 'CANDIDATE': ['vibrant', 'vibrant'] + ... }) + >>> result = compute_derived_axis_criteria(df) + >>> result['APPR'].iloc[0] + 0.8 + >>> result['CONN'].iloc[0] + 0.45 + + """ + result = df.copy() + + # Normalize 0-10 scale to 0-1 scale + result["APPR"] = result["APPR"] / 10 + result["UNDR"] = result["UNDR"] / 10 + + # Compute derived criteria + result["CLAR"] = 1 - 0.5 * result["ASSOCW"] / 10 - 0.5 * result["ASSOCCW"] / 10 + result["CONN"] = 0.5 * (result["IMPCW"] / 10 + result["IMPCCW"] / 10) + result["IBAL"] = 1 - np.abs(result["IMPCCW"] / 10 - result["IMPCW"] / 10) + + logger.debug("Computed derived axis criteria for dataset") + return result + + +def summarize_main_axis( + df: pd.DataFrame, + by_country: bool = False, # noqa: FBT001, FBT002 +) -> pd.DataFrame: + """ + Summarize main axis criteria by candidate translation. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with computed main axis criteria (output of + compute_main_axis_criteria). + by_country : bool, optional + If True, group by both COUNTRY and CANDIDATE. + If False, group by CANDIDATE only. Default is False. + + Returns + ------- + pd.DataFrame + DataFrame with mean values for each criterion, grouped by + CANDIDATE (and COUNTRY if by_country=True). + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'CANDIDATE': ['pleasant', 'pleasant', 'annoying', 'annoying'], + ... 'COUNTRY': ['SG', 'MY', 'SG', 'MY'], + ... 'APPR': [0.8, 0.85, 0.75, 0.8], + ... 'UNDR': [0.75, 0.8, 0.7, 0.75], + ... 'CLAR': [0.6, 0.65, 0.55, 0.6], + ... 'ANTO': [0.8, 0.85, 0.75, 0.8], + ... 'ORTH': [0.9, 0.95, 0.85, 0.9], + ... 'NCON': [0.7, 0.75, 0.65, 0.7], + ... 'IBAL': [0.85, 0.9, 0.8, 0.85] + ... }) + >>> result = summarize_main_axis(df, by_country=False) + >>> len(result) + 2 + >>> 'APPR' in result.columns + True + + """ + criteria = ["APPR", "UNDR", "CLAR", "ANTO", "ORTH", "NCON", "IBAL"] + + if by_country: + grouped = df.groupby(["COUNTRY", "CANDIDATE"])[criteria].mean().reset_index() + else: + grouped = df.groupby("CANDIDATE")[criteria].mean().reset_index() + + logger.debug(f"Summarized main axis data (by_country={by_country})") + return grouped + + +def summarize_derived_axis( + df: pd.DataFrame, + by_country: bool = False, # noqa: FBT001, FBT002 +) -> pd.DataFrame: + """ + Summarize derived axis criteria by candidate translation. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with computed derived axis criteria (output of + compute_derived_axis_criteria). + by_country : bool, optional + If True, group by both COUNTRY and CANDIDATE. + If False, group by CANDIDATE only. Default is False. + + Returns + ------- + pd.DataFrame + DataFrame with mean values for each criterion, grouped by + CANDIDATE (and COUNTRY if by_country=True). + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'CANDIDATE': ['vibrant', 'vibrant', 'calm', 'calm'], + ... 'COUNTRY': ['SG', 'MY', 'SG', 'MY'], + ... 'APPR': [0.8, 0.85, 0.75, 0.8], + ... 'UNDR': [0.75, 0.8, 0.7, 0.75], + ... 'CLAR': [0.6, 0.65, 0.55, 0.6], + ... 'CONN': [0.7, 0.75, 0.65, 0.7], + ... 'IBAL': [0.85, 0.9, 0.8, 0.85] + ... }) + >>> result = summarize_derived_axis(df, by_country=False) + >>> len(result) + 2 + >>> 'CONN' in result.columns + True + + """ + criteria = ["APPR", "UNDR", "CLAR", "CONN", "IBAL"] + + if by_country: + grouped = df.groupby(["COUNTRY", "CANDIDATE"])[criteria].mean().reset_index() + else: + grouped = df.groupby("CANDIDATE")[criteria].mean().reset_index() + + logger.debug(f"Summarized derived axis data (by_country={by_country})") + return grouped + + +def kruskal_wallis_test( + df: pd.DataFrame, + axis_type: Literal["main", "derived"], + independent_var: str = "CANDIDATE" +) -> pd.DataFrame: + """ + Perform Kruskal-Wallis test for each criterion across groups. + + The Kruskal-Wallis test is a non-parametric test to determine if there + are significant differences between groups. This is used to test if + different translation candidates have significantly different quality scores. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with computed criteria. + axis_type : {"main", "derived"} + Type of axis being tested. + independent_var : str, optional + Column name to group by for the test. Default is "CANDIDATE". + + Returns + ------- + pd.DataFrame + DataFrame with test results including: + - CRITERION: Name of the criterion tested + - statistic: Kruskal-Wallis H statistic + - pvalue: p-value for the test + - effect_size: Effect size (epsilon squared) + + Examples + -------- + >>> import pandas as pd + >>> import numpy as np + >>> np.random.seed(42) + >>> df = pd.DataFrame({ + ... 'CANDIDATE': ['A'] * 10 + ['B'] * 10 + ['C'] * 10, + ... 'APPR': np.random.uniform(0.5, 1.0, 30), + ... 'UNDR': np.random.uniform(0.5, 1.0, 30), + ... 'CLAR': np.random.uniform(0.4, 0.9, 30), + ... 'CONN': np.random.uniform(0.5, 0.9, 30), + ... 'IBAL': np.random.uniform(0.7, 1.0, 30) + ... }) + >>> result = kruskal_wallis_test(df, axis_type="derived") + >>> len(result) == 5 # One row per criterion (derived has 5 criteria) + True + >>> 'pvalue' in result.columns + True + + """ + if axis_type == "main": + criteria = ["APPR", "UNDR", "CLAR", "ANTO", "ORTH", "NCON", "IBAL"] + elif axis_type == "derived": + criteria = ["APPR", "UNDR", "CLAR", "CONN", "IBAL"] + else: + msg = "axis_type must be either 'main' or 'derived'" + raise ValueError(msg) + + results = [] + + for criterion in criteria: + # Group data by independent variable + groups = [ + group[criterion].to_numpy() + for name, group in df.groupby(independent_var) + ] + + # Perform Kruskal-Wallis test + statistic, pvalue = stats.kruskal(*groups) + + # Calculate effect size (epsilon squared) + n = len(df) + k = len(groups) + effect_size = (statistic - k + 1) / (n - k) + + results.append({ + "CRITERION": criterion, + "statistic": statistic, + "pvalue": pvalue, + "effect_size": effect_size + }) + + result_df = pd.DataFrame(results) + logger.debug(f"Performed Kruskal-Wallis test for {axis_type} axis") + return result_df + + +def mann_whitney_test( + df: pd.DataFrame, + criteria: list[str], + paq_attribute: str, + group_var: str = "COUNTRY" +) -> pd.DataFrame: + """ + Perform Mann-Whitney-Wilcoxon test for each criterion and candidate. + + This test compares two groups (e.g., two countries) for each translation + candidate to determine if there are significant differences in quality scores. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with computed criteria. + criteria : list[str] + List of criteria column names to test. + paq_attribute : str + Name of the PAQ (Perceived Affective Quality) attribute being tested + (e.g., "pleasant", "eventful"). + group_var : str, optional + Column name for grouping variable (e.g., "COUNTRY"). Default is "COUNTRY". + + Returns + ------- + pd.DataFrame + DataFrame with test results including: + - PAQ: PAQ attribute name + - CRITERION: Name of the criterion tested + - CANDIDATE: Translation candidate + - statistic: U statistic + - pvalue: p-value for the test + - adjusted_pvalue: Bonferroni-adjusted p-value + + Examples + -------- + >>> import pandas as pd + >>> import numpy as np + >>> np.random.seed(42) + >>> df = pd.DataFrame({ + ... 'COUNTRY': ['SG'] * 10 + ['MY'] * 10, + ... 'CANDIDATE': ['pleasant'] * 20, + ... 'APPR': np.random.uniform(0.5, 1.0, 20), + ... 'UNDR': np.random.uniform(0.5, 1.0, 20) + ... }) + >>> result = mann_whitney_test(df, ['APPR', 'UNDR'], 'pleasant') + >>> len(result) == 2 # One row per criterion + True + + """ + results = [] + + for criterion in criteria: + for candidate in df["CANDIDATE"].unique(): + # Filter data for this candidate + candidate_data = df[df["CANDIDATE"] == candidate] + + # Get groups + groups = candidate_data[group_var].unique() + if len(groups) != MANN_WHITNEY_NUM_GROUPS: + logger.warning( + f"Skipping {candidate} for {criterion}: " + f"Expected {MANN_WHITNEY_NUM_GROUPS} groups, found {len(groups)}" + ) + continue + + group1 = candidate_data[candidate_data[group_var] == groups[0]][criterion] + group2 = candidate_data[candidate_data[group_var] == groups[1]][criterion] + + # Perform Mann-Whitney U test + statistic, pvalue = stats.mannwhitneyu( + group1, group2, alternative="two-sided" + ) + + # Bonferroni correction (multiply by number of groups) + adjusted_pvalue = min(pvalue * MANN_WHITNEY_NUM_GROUPS, 1.0) + + results.append({ + "PAQ": paq_attribute, + "CRITERION": criterion, + "CANDIDATE": candidate, + "statistic": statistic, + "pvalue": pvalue, + "adjusted_pvalue": adjusted_pvalue + }) + + result_df = pd.DataFrame(results) + logger.debug(f"Performed Mann-Whitney test for {paq_attribute}") + return result_df + + +if __name__ == "__main__": + import xdoctest + + xdoctest.doctest_module(__file__) diff --git a/test/databases/test_satp_testing.py b/test/databases/test_satp_testing.py new file mode 100644 index 0000000..b826462 --- /dev/null +++ b/test/databases/test_satp_testing.py @@ -0,0 +1,447 @@ +"""Tests for the SATP translation testing module.""" + +import numpy as np +import pandas as pd +import pytest + +from soundscapy.databases import satp_testing + + +class TestComputeMainAxisCriteria: + """Test suite for compute_main_axis_criteria function.""" + + def test_basic_computation(self): + """Test basic computation of main axis criteria.""" + df = pd.DataFrame({ + 'APPR': [8.0, 9.0], + 'UNDR': [7.5, 8.5], + 'ANTO': [8.0, 7.0], + 'BIAS': [5.0, 4.5], + 'ASSOCCW': [6.0, 5.5], + 'IMPCCW': [4.0, 3.5], + 'ASSOCW': [7.0, 6.5], + 'IMPCW': [5.0, 4.5], + 'CANDIDATE': ['pleasant', 'pleasant'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + + # Check normalization of 0-10 scale to 0-1 + assert result['APPR'].iloc[0] == 0.8 + assert result['UNDR'].iloc[0] == 0.75 + assert result['ANTO'].iloc[0] == 0.8 + + # Check computed criteria + assert 'CLAR' in result.columns + assert 'ORTH' in result.columns + assert 'NCON' in result.columns + assert 'IBAL' in result.columns + + # Check that CANDIDATE column is preserved + assert 'CANDIDATE' in result.columns + + def test_clar_computation(self): + """Test CLAR (clarity) computation formula.""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], 'ANTO': [5.0], 'BIAS': [5.0], + 'ASSOCCW': [4.0], 'IMPCCW': [5.0], + 'ASSOCW': [6.0], 'IMPCW': [5.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + # CLAR = 1 - 0.5*(6.0/10) - 0.5*(4.0/10) = 1 - 0.3 - 0.2 = 0.5 + assert result['CLAR'].iloc[0] == pytest.approx(0.5) + + def test_orth_computation(self): + """Test ORTH (orthogonality) computation formula.""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], 'ANTO': [5.0], 'BIAS': [5.0], + 'ASSOCCW': [5.0], 'IMPCCW': [5.0], + 'ASSOCW': [5.0], 'IMPCW': [5.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + # ORTH = 1 - 2*|5.0/10 - 0.5| = 1 - 2*|0.5 - 0.5| = 1.0 + assert result['ORTH'].iloc[0] == pytest.approx(1.0) + + # Test with bias + df['BIAS'] = [7.0] + result = satp_testing.compute_main_axis_criteria(df) + # ORTH = 1 - 2*|7.0/10 - 0.5| = 1 - 2*0.2 = 0.6 + assert result['ORTH'].iloc[0] == pytest.approx(0.6) + + def test_ncon_computation(self): + """Test NCON (non-confusability) computation formula.""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], 'ANTO': [5.0], 'BIAS': [5.0], + 'ASSOCCW': [5.0], 'IMPCCW': [4.0], + 'ASSOCW': [5.0], 'IMPCW': [6.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + # NCON = 1 - 0.5*(6.0/10 + 4.0/10) = 1 - 0.5*1.0 = 0.5 + assert result['NCON'].iloc[0] == pytest.approx(0.5) + + def test_ibal_computation(self): + """Test IBAL (importance balance) computation formula.""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], 'ANTO': [5.0], 'BIAS': [5.0], + 'ASSOCCW': [5.0], 'IMPCCW': [6.0], + 'ASSOCW': [5.0], 'IMPCW': [4.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + # IBAL = 1 - |6.0/10 - 4.0/10| = 1 - 0.2 = 0.8 + assert result['IBAL'].iloc[0] == pytest.approx(0.8) + + def test_preserves_other_columns(self): + """Test that non-computed columns are preserved.""" + df = pd.DataFrame({ + 'COUNTRY': ['SG', 'MY'], + 'APPR': [8.0, 9.0], + 'UNDR': [7.5, 8.5], + 'ANTO': [8.0, 7.0], + 'BIAS': [5.0, 4.5], + 'ASSOCCW': [6.0, 5.5], + 'IMPCCW': [4.0, 3.5], + 'ASSOCW': [7.0, 6.5], + 'IMPCW': [5.0, 4.5], + 'CANDIDATE': ['pleasant', 'pleasant'] + }) + + result = satp_testing.compute_main_axis_criteria(df) + assert 'COUNTRY' in result.columns + assert result['COUNTRY'].tolist() == ['SG', 'MY'] + + +class TestComputeDerivedAxisCriteria: + """Test suite for compute_derived_axis_criteria function.""" + + def test_basic_computation(self): + """Test basic computation of derived axis criteria.""" + df = pd.DataFrame({ + 'APPR': [8.0, 9.0], + 'UNDR': [7.5, 8.5], + 'ASSOCCW': [6.0, 5.5], + 'IMPCCW': [4.0, 3.5], + 'ASSOCW': [7.0, 6.5], + 'IMPCW': [5.0, 4.5], + 'CANDIDATE': ['vibrant', 'vibrant'] + }) + + result = satp_testing.compute_derived_axis_criteria(df) + + # Check normalization + assert result['APPR'].iloc[0] == 0.8 + assert result['UNDR'].iloc[0] == 0.75 + + # Check computed criteria + assert 'CLAR' in result.columns + assert 'CONN' in result.columns + assert 'IBAL' in result.columns + + # Should not have main-axis-only criteria + assert 'ANTO' not in result.columns + assert 'ORTH' not in result.columns + assert 'NCON' not in result.columns + + def test_conn_computation(self): + """Test CONN (connectedness) computation formula.""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], + 'ASSOCCW': [5.0], 'IMPCCW': [4.0], + 'ASSOCW': [5.0], 'IMPCW': [6.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_derived_axis_criteria(df) + # CONN = 0.5*(6.0/10 + 4.0/10) = 0.5*1.0 = 0.5 + assert result['CONN'].iloc[0] == pytest.approx(0.5) + + def test_clar_computation_derived(self): + """Test CLAR computation for derived axis (same as main axis).""" + df = pd.DataFrame({ + 'APPR': [5.0], 'UNDR': [5.0], + 'ASSOCCW': [4.0], 'IMPCCW': [5.0], + 'ASSOCW': [6.0], 'IMPCW': [5.0], + 'CANDIDATE': ['test'] + }) + + result = satp_testing.compute_derived_axis_criteria(df) + # CLAR = 1 - 0.5*(6.0/10) - 0.5*(4.0/10) = 1 - 0.3 - 0.2 = 0.5 + assert result['CLAR'].iloc[0] == pytest.approx(0.5) + + +class TestSummarizeMainAxis: + """Test suite for summarize_main_axis function.""" + + def test_summary_without_country(self): + """Test summary aggregation without country grouping.""" + df = pd.DataFrame({ + 'CANDIDATE': ['pleasant', 'pleasant', 'annoying', 'annoying'], + 'COUNTRY': ['SG', 'MY', 'SG', 'MY'], + 'APPR': [0.8, 0.85, 0.75, 0.8], + 'UNDR': [0.75, 0.8, 0.7, 0.75], + 'CLAR': [0.6, 0.65, 0.55, 0.6], + 'ANTO': [0.8, 0.85, 0.75, 0.8], + 'ORTH': [0.9, 0.95, 0.85, 0.9], + 'NCON': [0.7, 0.75, 0.65, 0.7], + 'IBAL': [0.85, 0.9, 0.8, 0.85] + }) + + result = satp_testing.summarize_main_axis(df, by_country=False) + + # Should have one row per candidate + assert len(result) == 2 + assert set(result['CANDIDATE']) == {'pleasant', 'annoying'} + + # Check that means are computed correctly + pleasant_row = result[result['CANDIDATE'] == 'pleasant'].iloc[0] + assert pleasant_row['APPR'] == pytest.approx(0.825) + + def test_summary_with_country(self): + """Test summary aggregation with country grouping.""" + df = pd.DataFrame({ + 'CANDIDATE': ['pleasant', 'pleasant', 'pleasant', 'pleasant'], + 'COUNTRY': ['SG', 'SG', 'MY', 'MY'], + 'APPR': [0.8, 0.82, 0.85, 0.87], + 'UNDR': [0.75, 0.77, 0.8, 0.82], + 'CLAR': [0.6, 0.62, 0.65, 0.67], + 'ANTO': [0.8, 0.82, 0.85, 0.87], + 'ORTH': [0.9, 0.92, 0.95, 0.97], + 'NCON': [0.7, 0.72, 0.75, 0.77], + 'IBAL': [0.85, 0.87, 0.9, 0.92] + }) + + result = satp_testing.summarize_main_axis(df, by_country=True) + + # Should have one row per country-candidate combination + assert len(result) == 2 + assert 'COUNTRY' in result.columns + + # Check means for SG + sg_row = result[result['COUNTRY'] == 'SG'].iloc[0] + assert sg_row['APPR'] == pytest.approx(0.81) + + +class TestSummarizeDerivedAxis: + """Test suite for summarize_derived_axis function.""" + + def test_summary_without_country(self): + """Test summary aggregation without country grouping.""" + df = pd.DataFrame({ + 'CANDIDATE': ['vibrant', 'vibrant', 'calm', 'calm'], + 'COUNTRY': ['SG', 'MY', 'SG', 'MY'], + 'APPR': [0.8, 0.85, 0.75, 0.8], + 'UNDR': [0.75, 0.8, 0.7, 0.75], + 'CLAR': [0.6, 0.65, 0.55, 0.6], + 'CONN': [0.7, 0.75, 0.65, 0.7], + 'IBAL': [0.85, 0.9, 0.8, 0.85] + }) + + result = satp_testing.summarize_derived_axis(df, by_country=False) + + # Should have one row per candidate + assert len(result) == 2 + assert set(result['CANDIDATE']) == {'vibrant', 'calm'} + + # Should have CONN but not main-axis-only criteria + assert 'CONN' in result.columns + assert 'ANTO' not in result.columns + assert 'ORTH' not in result.columns + assert 'NCON' not in result.columns + + +class TestKruskalWallisTest: + """Test suite for kruskal_wallis_test function.""" + + def test_main_axis_test(self): + """Test Kruskal-Wallis test for main axis.""" + np.random.seed(42) + df = pd.DataFrame({ + 'CANDIDATE': ['A'] * 10 + ['B'] * 10 + ['C'] * 10, + 'APPR': np.random.uniform(0.5, 1.0, 30), + 'UNDR': np.random.uniform(0.5, 1.0, 30), + 'CLAR': np.random.uniform(0.4, 0.9, 30), + 'ANTO': np.random.uniform(0.5, 1.0, 30), + 'ORTH': np.random.uniform(0.6, 1.0, 30), + 'NCON': np.random.uniform(0.4, 0.8, 30), + 'IBAL': np.random.uniform(0.7, 1.0, 30) + }) + + result = satp_testing.kruskal_wallis_test(df, axis_type="main") + + # Should have one row per criterion + assert len(result) == 7 + + # Check columns exist + assert 'CRITERION' in result.columns + assert 'statistic' in result.columns + assert 'pvalue' in result.columns + assert 'effect_size' in result.columns + + # All criteria should be present + expected_criteria = ['APPR', 'UNDR', 'CLAR', 'ANTO', 'ORTH', 'NCON', 'IBAL'] + assert set(result['CRITERION']) == set(expected_criteria) + + def test_derived_axis_test(self): + """Test Kruskal-Wallis test for derived axis.""" + np.random.seed(42) + df = pd.DataFrame({ + 'CANDIDATE': ['A'] * 10 + ['B'] * 10 + ['C'] * 10, + 'APPR': np.random.uniform(0.5, 1.0, 30), + 'UNDR': np.random.uniform(0.5, 1.0, 30), + 'CLAR': np.random.uniform(0.4, 0.9, 30), + 'CONN': np.random.uniform(0.5, 0.9, 30), + 'IBAL': np.random.uniform(0.7, 1.0, 30) + }) + + result = satp_testing.kruskal_wallis_test(df, axis_type="derived") + + # Should have one row per criterion (5 for derived axis) + assert len(result) == 5 + + # Derived axis criteria + expected_criteria = ['APPR', 'UNDR', 'CLAR', 'CONN', 'IBAL'] + assert set(result['CRITERION']) == set(expected_criteria) + + def test_invalid_axis_type(self): + """Test that invalid axis type raises ValueError.""" + df = pd.DataFrame({ + 'CANDIDATE': ['A', 'B'], + 'APPR': [0.5, 0.6] + }) + + with pytest.raises(ValueError, match="axis_type must be either 'main' or 'derived'"): + satp_testing.kruskal_wallis_test(df, axis_type="invalid") + + +class TestMannWhitneyTest: + """Test suite for mann_whitney_test function.""" + + def test_basic_test(self): + """Test Mann-Whitney test with two countries.""" + np.random.seed(42) + df = pd.DataFrame({ + 'COUNTRY': ['SG'] * 10 + ['MY'] * 10, + 'CANDIDATE': ['pleasant'] * 20, + 'APPR': np.random.uniform(0.5, 1.0, 20), + 'UNDR': np.random.uniform(0.5, 1.0, 20) + }) + + result = satp_testing.mann_whitney_test( + df, ['APPR', 'UNDR'], 'pleasant' + ) + + # Should have one row per criterion + assert len(result) == 2 + + # Check columns + assert 'PAQ' in result.columns + assert 'CRITERION' in result.columns + assert 'CANDIDATE' in result.columns + assert 'statistic' in result.columns + assert 'pvalue' in result.columns + assert 'adjusted_pvalue' in result.columns + + # Check PAQ attribute + assert all(result['PAQ'] == 'pleasant') + + def test_multiple_candidates(self): + """Test Mann-Whitney test with multiple candidates.""" + np.random.seed(42) + df = pd.DataFrame({ + 'COUNTRY': ['SG'] * 10 + ['MY'] * 10 + ['SG'] * 10 + ['MY'] * 10, + 'CANDIDATE': ['pleasant'] * 20 + ['annoying'] * 20, + 'APPR': np.random.uniform(0.5, 1.0, 40) + }) + + result = satp_testing.mann_whitney_test(df, ['APPR'], 'test') + + # Should have one row per candidate + assert len(result) == 2 + assert set(result['CANDIDATE']) == {'pleasant', 'annoying'} + + def test_bonferroni_correction(self): + """Test that Bonferroni correction is applied.""" + np.random.seed(42) + df = pd.DataFrame({ + 'COUNTRY': ['SG'] * 10 + ['MY'] * 10, + 'CANDIDATE': ['pleasant'] * 20, + 'APPR': np.random.uniform(0.5, 1.0, 20) + }) + + result = satp_testing.mann_whitney_test(df, ['APPR'], 'pleasant') + + # Adjusted p-value should be 2x original (or capped at 1.0) + pvalue = result['pvalue'].iloc[0] + adjusted = result['adjusted_pvalue'].iloc[0] + assert adjusted == pytest.approx(min(pvalue * 2, 1.0)) + + +class TestIntegration: + """Integration tests for the complete workflow.""" + + def test_main_axis_workflow(self): + """Test complete workflow for main axis.""" + # Create mock survey data with multiple candidates + np.random.seed(42) + n_per_candidate = 5 + df = pd.DataFrame({ + 'COUNTRY': ['SG', 'MY'] * n_per_candidate + ['SG', 'MY'] * n_per_candidate, + 'APPR': np.random.uniform(7, 10, n_per_candidate * 4), + 'UNDR': np.random.uniform(7, 10, n_per_candidate * 4), + 'ANTO': np.random.uniform(7, 10, n_per_candidate * 4), + 'BIAS': np.random.uniform(4, 6, n_per_candidate * 4), + 'ASSOCCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'IMPCCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'ASSOCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'IMPCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'CANDIDATE': ['pleasant'] * (n_per_candidate * 2) + ['annoying'] * (n_per_candidate * 2) + }) + + # Step 1: Compute criteria + computed = satp_testing.compute_main_axis_criteria(df) + assert 'CLAR' in computed.columns + assert 'ORTH' in computed.columns + + # Step 2: Summarize + summary = satp_testing.summarize_main_axis(computed, by_country=False) + assert len(summary) == 2 # Two candidates + + # Step 3: Statistical test + result = satp_testing.kruskal_wallis_test(computed, axis_type="main") + assert len(result) == 7 + + def test_derived_axis_workflow(self): + """Test complete workflow for derived axis.""" + # Create mock survey data with multiple candidates + np.random.seed(42) + n_per_candidate = 5 + df = pd.DataFrame({ + 'COUNTRY': ['SG', 'MY'] * n_per_candidate + ['SG', 'MY'] * n_per_candidate, + 'APPR': np.random.uniform(7, 10, n_per_candidate * 4), + 'UNDR': np.random.uniform(7, 10, n_per_candidate * 4), + 'ASSOCCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'IMPCCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'ASSOCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'IMPCW': np.random.uniform(3, 7, n_per_candidate * 4), + 'CANDIDATE': ['vibrant'] * (n_per_candidate * 2) + ['calm'] * (n_per_candidate * 2) + }) + + # Step 1: Compute criteria + computed = satp_testing.compute_derived_axis_criteria(df) + assert 'CLAR' in computed.columns + assert 'CONN' in computed.columns + + # Step 2: Summarize + summary = satp_testing.summarize_derived_axis(computed, by_country=True) + assert len(summary) == 4 # Two countries x two candidates + + # Step 3: Statistical test + result = satp_testing.kruskal_wallis_test(computed, axis_type="derived") + assert len(result) == 5