From c31c92c79a82194390803f29b23fb1203293129d Mon Sep 17 00:00:00 2001 From: krzysztofkrzeslak Date: Mon, 24 Mar 2025 23:38:55 +0100 Subject: [PATCH] Added example config with parameters descriptions --- config/odaslive/example_with_descriptions.cfg | 456 ++++++++++++++++++ 1 file changed, 456 insertions(+) create mode 100644 config/odaslive/example_with_descriptions.cfg diff --git a/config/odaslive/example_with_descriptions.cfg b/config/odaslive/example_with_descriptions.cfg new file mode 100644 index 00000000..bf55dd6e --- /dev/null +++ b/config/odaslive/example_with_descriptions.cfg @@ -0,0 +1,456 @@ +# This file is Configuration file is based on MiniDSP_uma8.cfg + +version = "2.1"; + +# Raw + +# Raw audio input configuration for ODAS +raw: { + + # Sampling rate (samples per second). Determines how often the audio is captured. + # Higher sampling rates capture more detail but increase processing load. + fS = 48000; # 48 kHz is a common rate for high-quality audio, especially for microphone arrays. + + # Hop size (number of samples between each frame). + # Smaller hop sizes increase temporal resolution but require more processing power. + hopSize = 512; # Typical values are 256 or 512 for real-time applications. + + # Number of bits per sample. Defines the audio resolution. + # Higher bit depth increases audio quality but also increases data size. + nBits = 32; # 32-bit provides high dynamic range, useful for precise localization. + + # Number of audio channels. Represents the number of microphones in the array. + # Increasing the number of channels improves spatial resolution but requires more processing. + nChannels = 8; # UMA-8 microphone array typically has 8 channels. + + # Input configuration to capture raw signals from the microphone array. + interface: { + + # Input type for the signal. "soundcard" indicates that audio is captured from a physical device. + type = "soundcard"; + + # Soundcard number. Use `arecord -l` to list available soundcards. + # Changing this value selects a different soundcard. + card = 1; + + # Device number within the soundcard. Use `arecord -l` to check the device ID. + device = 0; + + # Audio format of the raw data. "S32_LE" means 32-bit signed little-endian. + # Ensure this matches the microphone's native format, or data might be distorted. + format = "S32_LE"; + + # Sampling rate configuration to match the audio capture rate. + rate = 48000; # Must match `fS` to avoid mismatches in data interpretation. + + # Number of audio channels in the input stream. Must match the array configuration. + channels = 8; # Ensures all microphones are captured. + }; +} + +# Mapping + +mapping: +{ + + # changing the order might affect the spatial arrangement and localization accuracy. + map: (1, 2, 3, 4, 5, 6, 7); + +} + +# General + +# General configuration for ODAS +general: { + + # Epsilon is a small constant added to avoid division by zero or numerical instability. + # Adjusting this value is not recommended as it may lead to computational issues. + epsilon = 1E-20; + + # Frame and hop size configuration + size: { + # hopSize: Number of samples between successive frames (overlap). + # Smaller values provide better temporal resolution but increase computational load. + hopSize = 128; + + # frameSize: Number of samples per frame. + # Larger values provide better frequency resolution but increase latency. + frameSize = 256; + }; + + # Sampling rate configuration (related to input sound and speed of sound calculations) + samplerate: { + # mu: Mean sampling rate in Hz (16 kHz). + # Adjusting this value changes the time resolution of localization. + mu = 16000; + + # sigma2: Variance in sampling rate, should be kept small for stability. + sigma2 = 0.01; + }; + + # Speed of sound configuration + speedofsound: { + # mu: Mean speed of sound in m/s (343 m/s at 20°C). + # Adjusting this value impacts sound source localization accuracy. + # Use environmental measurements if accuracy is critical. + mu = 343.0; + + # sigma2: Variance in speed of sound. + # Higher values make the algorithm more tolerant to sound speed variations but can reduce precision. + sigma2 = 25.0; + }; + + # Microphone geometry configuration + mics = ( + + # Each microphone configuration block contains: + # - mu: Microphone position coordinates (x, y, z) in meters. + # - sigma2: Covariance matrix components (usually zero for fixed positions). + # - direction: Direction vector of the microphone (normalized). + # - angle: Angular range in degrees defining the microphone's sensitivity or acceptance angle. + + # Microphone 0 + { + mu = ( +0.000, +0.000, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 1 + { + mu = ( +0.000, +0.043, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 2 + { + mu = ( +0.037, +0.021, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 3 + { + mu = ( +0.037, -0.021, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 4 + { + mu = ( +0.000, -0.043, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 5 + { + mu = ( -0.037, -0.021, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + }, + + # Microphone 6 + { + mu = ( -0.037, +0.021, +0.000 ); + sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 ); + direction = ( +0.000, +0.000, +1.000 ); + angle = ( 90.0, 100.0 ); + } + + ); + + # Spatial filters restrict the search space to a specific region, + # reducing false detections or focusing on a specific area. + spatialfilters = ( + { + # direction: Normalized direction vector defining the spatial filter. + direction = ( +0.000, +0.000, +1.000 ); + + # angle: Angular range (in degrees) of the filter. + # Reducing the angle can limit detection to a narrow cone, reducing false positives. + angle = (90.0, 95.0); + } + ); + + # nThetas: Number of azimuth angles to consider during localization. + # Higher values improve accuracy but increase computation. + nThetas = 181; + + # gainMin: Minimum gain threshold to consider a source. + # Lowering this value makes ODAS more sensitive to quieter sources, but may increase noise. + gainMin = 0.25; + +}; + + + + +ssl:{ + nMatches = 10; + probMin = 0.5; + nRefinedLevels = 1; + interpRate = 4; + + # Number of scans: level is the resolution of the sphere + # and delta is the size of the maximum sliding window + # (delta = -1 means the size is automatically computed) + scans = ( + { level = 2; delta = -1; }, + { level = 4; delta = -1; } + ); + + # Output to export potential sources + potential: { + + format = "json"; + + interface: { + type = "socket"; + ip = "127.0.0.1"; # IP address for data output + port = 9001; # Port for sending JSON data + }; + + }; +} + + +# Stationary Noise Estimation (SNE) configuration +sne: { + + # Number of frequency bins used for noise estimation. + # Higher values give finer frequency resolution but increase computation. + b = 3; + + # Smoothing factor for noise power estimation. + # A lower value makes the noise estimate change slowly, + # while a higher value makes it respond faster to noise variations. + alphaS = 0.1; + + # Frame length for the estimation window. + # Larger values provide more stable noise estimates but can delay adaptation to changing noise conditions. + L = 150; + + # Noise threshold for distinguishing between signal and noise. + # Higher values make it more likely to classify weak signals as noise. + delta = 3.0; + + # Smoothing factor for decision-directed approach. + # Controls the balance between current and past noise estimates. + alphaD = 0.1; + +}; + + +# Sound Source Tracking + +# Sound Source Tracking (SST) configuration +sst: { + + # Mode selection: "kalman" or "particle" + # - "kalman": Efficient and accurate for smooth motion and Gaussian noise. + # - "particle": Suitable for complex, nonlinear, and unpredictable motion. + mode = "particle"; + + # Addition method for new sources: "static" or "dynamic" + # - "static": Fixed number of sources. + # - "dynamic": Detects and tracks emerging sound sources. + add = "dynamic"; + + # Common parameters for both Kalman and particle filters + active = ( + { weight = 1.0; mu = 0.4; sigma2 = 0.0025 } + ); + + inactive = ( + { weight = 1.0; mu = 0.25; sigma2 = 0.0025 } + ); + + # Variance in spatial distribution + sigmaR2_prob = 0.0025; + sigmaR2_active = 0.0225; + sigmaR2_target = 0.0025; + + # Probabilities for source management + Pfalse = 0.1; # Probability of false detection + Pnew = 0.1; # Probability of adding a new source + Ptrack = 0.8; # Probability of maintaining an existing track + + # Thresholds for detecting new and probable sources + theta_new = 0.3; + N_prob = 5; + theta_prob = 0.8; + + # Inactivity handling + N_inactive = ( 150, 150, 150, 150 ); # Time steps before source is considered inactive + theta_inactive = 0.9; + + # Kalman filter-specific parameters + kalman: { + sigmaQ = 0.001; # Process noise covariance, controlling smoothing + }; + + # Particle filter-specific parameters + particle: { + + # Number of particles (samples to model distribution) + nParticles = 1000; + + # Spatial transition parameters + st_alpha = 2.0; # Weight for spatial motion model + st_beta = 0.04; # Variance in spatial motion + st_ratio = 0.5; # Ratio controlling spatial particle spread + + # Velocity transition parameters + ve_alpha = 0.05; # Weight for velocity motion model + ve_beta = 0.2; # Variance in velocity motion + ve_ratio = 0.3; # Ratio controlling velocity particle spread + + # Acceleration transition parameters + ac_alpha = 0.5; # Weight for acceleration motion model + ac_beta = 0.2; # Variance in acceleration motion + ac_ratio = 0.2; # Ratio controlling acceleration particle spread + + # Minimum weight for active tracking + Nmin = 0.7; + + }; + + # List of known targets (empty for dynamic tracking) + target: (); + + # Output to export tracked sources + tracked: { + format = "json"; # Output format for tracked data + + # Communication interface + interface: { + type = "socket"; + ip = "127.0.0.1"; # IP address for data output + port = 9000; # Port for sending JSON data + }; + }; + +}; + + +# Sound Source Separation (SSS) Configuration +sss: { + + # Mode for separation and post-filtering + # - mode_sep: Algorithm used for sound separation (choices: "dds", "dgss", "dmvdr") + # - mode_pf: Post-filtering mode ("ss" - Spectral Subtraction) + mode_sep = "dds"; + mode_pf = "ss"; + + # Gain values for separation and post-filtering + # - gain_sep: Gain applied to the separated signals + # - gain_pf: Gain applied after post-filtering + gain_sep = 1.0; + gain_pf = 10.0; + + # Delay-and-Sum (DDS) mode configuration + dds: { + # DDS is a simple beamforming method that combines signals from multiple microphones + # to enhance the desired sound source. No additional parameters needed. + }; + + # Degenerate Generalized Sidelobe Canceller (DGSS) mode configuration + dgss: { + mu = 0.01; # Adaptation rate for DGSS filter (higher values lead to faster adaptation but less stability) + lambda = 0.5; # Regularization parameter (balances noise reduction and signal preservation) + }; + + # Distance Minimum Variance Distortionless Response (DMVDR) mode configuration + dmvdr: { + # DMVDR is an advanced beamforming method that minimizes output power while maintaining a distortionless response + # for signals from a specified direction. No additional parameters needed. + }; + + # Masking and Spectral Subtraction (MS) configuration + ms: { + alphaPmin = 0.07; # Minimum power update coefficient (reduces noise floor) + eta = 0.5; # Noise overestimation factor (higher values increase noise suppression) + alphaZ = 0.8; # Forgetting factor for noise statistics (smoothing) + thetaWin = 0.3; # Windowing parameter (controls frame overlap) + alphaWin = 0.3; # Smoothing factor for noise estimation + maxAbsenceProb = 0.9;# Maximum probability for source absence (helps remove false positives) + Gmin = 0.01; # Minimum gain for noise suppression + winSizeLocal = 3; # Local window size for noise estimation + winSizeGlobal = 23; # Global window size for noise estimation + winSizeFrame = 256; # Frame size for windowing in noise suppression + }; + + # Spectral Subtraction (SS) post-filtering configuration + ss: { + Gmin = 0.01; # Minimum gain for spectral subtraction (prevents over-suppression) + Gmid = 0.5; # Mid gain level (balance between noise and signal) + Gslope = 10.0; # Gain slope (affects how aggressively noise is reduced) + }; + + # Separated signals output configuration + separated: { + fS = 48000; # Sampling rate in Hz + hopSize = 128; # Hop size for processing frames + nBits = 32; # Bit depth for separated signals + + interface: { + type = "file"; # Output to file + path = "/home/krzysiek/moje/radar/uma8/separated.raw"; # Path for separated audio + }; + }; + + # Post-filtered signals output configuration + postfiltered: { + fS = 48000; # Sampling rate in Hz + hopSize = 128; # Hop size for processing frames + nBits = 32; # Bit depth for post-filtered signals + gain = 10.0; # Gain applied after post-filtering + + interface: { + type = "file"; # Output to file + path = "/home/krzysiek/moje/radar/uma8/postfiltered.raw"; # Path for post-filtered audio + }; + }; +}; + +classify: { + + # Frame size for analysis (number of samples per frame) + frameSize = 4096; + + # Window size for classification smoothing + winSize = 3; + + # Minimum and maximum time delays (in samples) used for localization and classification + tauMin = 88; + tauMax = 551; + + # Maximum allowed change in time delay between frames (used for tracking moving sources) + deltaTauMax = 20; + + # Smoothing and adaptation coefficients + alpha = 0.3; # Smoothing factor for updating classifications (lower values = slower adaptation) + gamma = 0.05; # Threshold factor for classification stability + + # Minimum confidence level for a classification to be accepted + phiMin = 0.5; + + # Threshold for classification stability (ratio of how stable the classification should be) + r0 = 0.2; + # Category output configuration (disabled in this case) + category: { + format = "json"; + interface: { + type = "terminal"; + } + } + +}