diff --git a/.pylintrc b/.pylintrc index 575de75..bb63363 100644 --- a/.pylintrc +++ b/.pylintrc @@ -3,5 +3,8 @@ max-line-length=256 max-module-lines=65536 good-names=x,y,z,A,t,df,cv,e +[DESIGN] +max-public-methods=25 + [MESSAGES CONTROL] disable=import-outside-toplevel,too-many-arguments,logging-fstring-interpolation,use-maxsplit-arg,fixme,cyclic-import,too-many-positional-arguments,abstract-method diff --git a/doc/news/apply-scaling-factor.rst b/doc/news/apply-scaling-factor.rst new file mode 100644 index 0000000..955eb87 --- /dev/null +++ b/doc/news/apply-scaling-factor.rst @@ -0,0 +1,5 @@ +**Added:** + +* Added ``Entry.apply_scaling_factor`` which multiplies a column by a given value and tracks the cumulative scaling factor in the field metadata. +* Added ``EchemdbEntry.scan_rate`` property returning the scan rate as an astropy quantity from ``figureDescription.scanRate``. +* Added ``EchemdbEntry.rescale_scan_rate`` which rescales the current (density) axis by the ratio of a new scan rate to the original one, which is essentially like applying a scaling factor. diff --git a/doc/usage/echemdb_usage.md b/doc/usage/echemdb_usage.md index 3e30c88..251ea76 100644 --- a/doc/usage/echemdb_usage.md +++ b/doc/usage/echemdb_usage.md @@ -122,6 +122,36 @@ The field descriptions are updated accordingly. original_entry.fields ``` +### Scan rate + +The scan rate used to record the data is accessible as an astropy quantity. + +```{code-cell} ipython3 +entry.scan_rate +``` + +### Rescaling the scan rate + +CVs are often recorded with different scan rate. The `rescale_scan_rate` method rescales the `j` (or `I`) axis by the ratio of a given scan rate to the original one, for better comparison of the data, which provides information on transport and kinetic effects. Essentially this applies a scaling factor to the `j` (or `I`), which is tracked in the field metadata. + +```{code-cell} ipython3 +rescaled_sr = entry.rescale_scan_rate(value=100, unit='mV / s') +rescaled_sr.df.head() +``` + +The scaling factor is stored in the field description. + +```{code-cell} ipython3 +rescaled_sr.resource.schema.get_field('j') +``` + +A custom field name can be provided if the current axis has a different name. + +```{code-cell} ipython3 +rescaled_sr_custom = entry.rescale_scan_rate('j', value=0.1, unit='V / s') +rescaled_sr_custom.df.head() +``` + ### Shifting reference scales A key issue for comparing electrochemical current potential traces is that data can be recorded with different reference electrodes. Hence direct comparison of the potential data is not straight forward unless the data is shifted to a common reference scale. The shift to a different reference scale depends on how the value of that reference electrode vs the standard hydrogen electrode (SHE) is determined and sometimes depends on the source of the reported data. diff --git a/doc/usage/unitpackage_usage.md b/doc/usage/unitpackage_usage.md index 7fdf92c..12d7db6 100644 --- a/doc/usage/unitpackage_usage.md +++ b/doc/usage/unitpackage_usage.md @@ -215,6 +215,19 @@ The offset is indicated in the field descriptions. For subsequent offsets, the v offset_entry.resource.schema.get_field('E') ``` +A scaling factor can be applied to multiply a column by a given value. + +```{code-cell} ipython3 +scaled_entry = entry.apply_scaling_factor('j', 2) +scaled_entry.df.head() +``` + +The scaling factor is tracked in the field descriptions. For subsequent scaling factors, the cumulative value is stored. + +```{code-cell} ipython3 +scaled_entry.resource.schema.get_field('j') +``` + To add a computed column with proper field descriptions, use `entry.add_columns()`. This ensures that the field metadata (such as units) is tracked correctly. diff --git a/unitpackage/database/echemdb_entry.py b/unitpackage/database/echemdb_entry.py index 62b0d92..7cd12df 100644 --- a/unitpackage/database/echemdb_entry.py +++ b/unitpackage/database/echemdb_entry.py @@ -260,6 +260,98 @@ def rescale(self, units): return super().rescale(units) + @property + def scan_rate(self): + r""" + Return the scan rate of the entry as an astropy quantity. + + The scan rate is retrieved from the entry's metadata + at ``figureDescription.scanRate``. + + EXAMPLES:: + + >>> entry = EchemdbEntry.create_examples()[0] + >>> entry.scan_rate + + + >>> from unitpackage.database.echemdb import Echemdb + >>> db = Echemdb.create_example() + >>> db['engstfeld_2018_polycrystalline_17743_f4b_1'].scan_rate + + + """ + return self.figureDescription.scanRate.quantity + + def rescale_scan_rate(self, field_name=None, *, value, unit): + r""" + Return a rescaled :class:`~unitpackage.database.echemdb_entry.EchemdbEntry` + where the current (``I``) or current density (``j``) axis is rescaled + according to the ratio of the provided scan rate to the original scan rate. + + Since current (density) scales linearly with scan rate in cyclic voltammetry, + this method multiplies the ``j`` (or ``I``) column by ``new_scan_rate / original_scan_rate``. + The scaling factor is tracked in the field metadata. + + By default the ``j`` (or ``I``) field is rescaled. A custom ``field_name`` + can be provided if the current axis has a different name. + + EXAMPLES:: + + >>> entry = EchemdbEntry.create_examples()[0] + >>> entry.scan_rate + + + >>> entry.df.head() # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.00 -0.103158 -0.998277 + 1 0.02 -0.102158 -0.981762 + ... + + Rescale from 50 mV/s to 100 mV/s (factor of 2):: + + >>> rescaled = entry.rescale_scan_rate(value=100, unit='mV / s') + >>> rescaled.df # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.000000 -0.103158 -1.996553 + 1 0.020000 -0.102158 -1.963524 + ... + + >>> rescaled.resource.schema.get_field('j') # doctest: +NORMALIZE_WHITESPACE + {'name': 'j', + 'type': 'number', + 'unit': 'A / m2', + 'scalingFactor': {'value': 2.0}} + + Rescale using the same unit as the original scan rate:: + + >>> rescaled2 = entry.rescale_scan_rate(value=0.1, unit='V / s') + >>> rescaled2.df # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.000000 -0.103158 -1.996553 + 1 0.020000 -0.102158 -1.963524 + ... + + A custom field name can be provided:: + + >>> rescaled3 = entry.rescale_scan_rate('j', value=100, unit='mV / s') + >>> rescaled3.df # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.000000 -0.103158 -1.996553 + 1 0.020000 -0.102158 -1.963524 + ... + + """ + import astropy.units as u + + original_scan_rate = self.scan_rate + new_scan_rate = (value * u.Unit(unit)).to(original_scan_rate.unit) + + scaling_factor = (new_scan_rate / original_scan_rate).decompose().value + + field_name = field_name or self._normalize_field_name("j") + + return self.apply_scaling_factor(field_name, scaling_factor) + def _normalize_field_name(self, field_name): r""" Return the name of a field name of the `unitpackage` resource. diff --git a/unitpackage/entry.py b/unitpackage/entry.py index 625c7c1..ca721dc 100644 --- a/unitpackage/entry.py +++ b/unitpackage/entry.py @@ -254,7 +254,8 @@ def __dir__(self): >>> entry = Entry.create_examples()[0] >>> dir(entry) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - [... 'create_examples', 'default_metadata_key', 'df', 'echemdb', 'field_unit', + [... 'add_offset', 'apply_scaling_factor', 'create_examples', 'default_metadata_key', + 'df', 'echemdb', 'field_unit', 'fields', 'from_csv', 'from_df', 'from_local', 'identifier', 'load_metadata', 'metadata', 'plot', 'remove_column', 'remove_columns', 'rename_field', 'rename_fields', 'rescale', 'resource', 'save', 'update_fields', 'yaml'] @@ -574,6 +575,90 @@ def add_offset(self, field_name=None, offset=None, unit=""): return type(self)(resource=new_resource) + def apply_scaling_factor(self, field_name=None, scaling_factor=None): + r""" + Return an entry with a ``scaling_factor`` applied to a specified field of the entry. + The scaling factor is stored in the fields metadata. + + If scaling factors are applied consecutively, the value is updated + (i.e., the cumulative scaling factor is the product of the individual factors). + + EXAMPLES:: + + >>> from unitpackage.entry import Entry + >>> entry = Entry.create_examples()[0] + >>> entry.df.head() # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.00 -0.103158 -0.998277 + 1 0.02 -0.102158 -0.981762 + ... + + >>> new_entry = entry.apply_scaling_factor('j', 2) + >>> new_entry.df.head() # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.00 -0.103158 -1.996553 + 1 0.02 -0.102158 -1.963524 + ... + + >>> new_entry.resource.schema.get_field('j') # doctest: +NORMALIZE_WHITESPACE + {'name': 'j', + 'type': 'number', + 'unit': 'A / m2', + 'scalingFactor': {'value': 2.0}} + + A consecutively applied scaling factor:: + + >>> new_entry_1 = new_entry.apply_scaling_factor('j', 3) + >>> new_entry_1.df.head() # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.00 -0.103158 -5.989660 + 1 0.02 -0.102158 -5.890572 + ... + + >>> new_entry_1.resource.schema.get_field('j') # doctest: +NORMALIZE_WHITESPACE + {'name': 'j', + 'type': 'number', + 'unit': 'A / m2', + 'scalingFactor': {'value': 6.0}} + + Scaling by a float:: + + >>> new_entry_2 = entry.apply_scaling_factor('E', 1e3) + >>> new_entry_2.df.head() # doctest: +NORMALIZE_WHITESPACE + t E j + 0 0.00 -103.158422 -0.998277 + 1 0.02 -102.158422 -0.981762 + ... + + """ + if scaling_factor is None: + raise ValueError("A scaling_factor must be provided.") + + if scaling_factor == 0: + raise ValueError("A scaling_factor of 0 is not allowed.") + + field = self.resource.schema.get_field(field_name) + + # Create a new dataframe with scaled values + df = self.df.copy() + df[field_name] *= scaling_factor + + # Calculate the cumulative scaling factor + old_scaling_factor = field.custom.get("scalingFactor", {}).get("value", 1) + new_scaling_factor = float(old_scaling_factor * scaling_factor) + + # Create new resource with scaling factor metadata + field_updates = { + field_name: { + "scalingFactor": { + "value": new_scaling_factor, + } + } + } + new_resource = self._create_new_df_resource(df, field_updates=field_updates) + + return type(self)(resource=new_resource) + def _create_new_df_resource(self, df, schema=None, field_updates=None): r""" Create a new dataframe resource from a dataframe, preserving metadata and schema.