diff --git a/pyproject.toml b/pyproject.toml index bdfecd61..2b85620e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -334,19 +334,7 @@ dt = [ # Gen Module - General utilities # Use: pip install scitex[gen] -gen = [ - "ipython", - "h5py", - "pyperclip", - "readchar", - "scipy", - "matplotlib", - "xarray", - "requests", - "joblib", - # # Heavy dependencies handled by _AVAILABLE flags - # "torch", -] +gen = ["scitex-gen>=0.1.0"] # Etc Module - Miscellaneous utilities # Use: pip install scitex[etc] diff --git a/src/scitex/gen/README.md b/src/scitex/gen/README.md deleted file mode 100755 index 411db29b..00000000 --- a/src/scitex/gen/README.md +++ /dev/null @@ -1,71 +0,0 @@ - - -# `scitex.gen` Quick Start Guide - -The `scitex.gen` module is a collection of general-purpose utility functions and classes designed to simplify common programming tasks in data science and machine learning workflows. This guide will introduce you to some of the key functions and show you how to use them with examples. - -# [`scitex.gen`](https://github.com/ywatanabe1989/scitex/tree/main/src/scitex/gen/) - -## Quick Start -```python -# Import necessary modules -import scitex -import sys -import numpy as np -import matplotlib.pyplot as plt - -# Initialize the environment using scitex.session.start -# This function sets up logging, fixes random seeds, configures matplotlib, and returns CONFIG and other variables -CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start( - sys, # System module for I/O redirection - plt, # Matplotlib pyplot module for plotting configuration - verbose=True # Set to False to suppress detailed output -) - -# Your main code goes here -# For example, generate some data and plot it -x = np.linspace(0, 2 * np.pi, 100) -y = np.sin(x) - -plt.plot(x, y, label='Sine Wave') -plt.title('Sine Wave Plot') -plt.xlabel('Angle [rad]') -plt.ylabel('Sin(x)') -plt.legend() - -# Save the figure using scitex.io.save -scitex.io.save(plt, 'sine_wave_plot.png') - -# See scitex.plt.subplots to automatic data tracking and saving in a sigmaplot-compatible format - -# Finalize the script using scitex.session.close -# This function handles cleanup tasks, saves configurations, and can send notifications if enabled -scitex.session.close(CONFIG) -``` - -This script demonstrates the basic usage of `scitex.session.start` and `scitex.session.close` for initializing and finalizing your environment when running scripts using the `scitex` package. - -- **`scitex.session.start`**: - - Sets up logging to capture stdout and stderr. - - Fixes random seeds for reproducibility. - - Configures Matplotlib settings. - - Returns a configuration dictionary (`CONFIG`) and other variables for use in your script. - -- **`scitex.session.close`**: - - Handles cleanup tasks such as flushing output streams. - - Saves configuration settings and logs. - - Optionally sends notifications upon script completion. - -By wrapping your main code between `scitex.session.start` and `scitex.session.close`, you ensure that your script has a consistent environment and that all resources are properly managed. - -**Note**: Replace `'sine_wave_plot.png'` with your desired file path or name for saving the plot. - - -## Contact -Yusuke Watanabe (ywatanabe@scitex.ai) - -For more information and updates, please visit the [scitex GitHub repository](https://github.com/ywatanabe1989/scitex). diff --git a/src/scitex/gen/_DimHandler.py b/src/scitex/gen/_DimHandler.py deleted file mode 100755 index dccb6a28..00000000 --- a/src/scitex/gen/_DimHandler.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-05 00:39:26 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_DimHandler.py - -""" -This script demonstrates DimHandler, which: -1) Keeps designated dimensions, -2) Permutes the kept dimensions to the last while maintaining their relative order, -3) Reshapes the remaining dimensions to the first, batch dimension, -4) (Performs calculations), -5) Restores the summarized dimensions to their original shapes. -""" - -# Imports -import sys - -import matplotlib.pyplot as plt -import numpy as np -import torch - - -# Functions -class DimHandler: - """ - A utility class for handling dimension manipulations on tensors or arrays, including reshaping and permuting dimensions. - - Attributes: - orig_shape (tuple): The original shape of the input tensor or array before any manipulation. - keepdims (list): The list of dimensions to be kept and moved to the end. - n_non_keepdims (list): The sizes of the dimensions not kept, used for reshaping back to the original shape. - n_keepdims (list): The sizes of the kept dimensions, used for reshaping. - - Example1: - import torch - - dh = DimHandler() - x = torch.rand(1, 2, 3, 4, 5, 6) # Example tensor - print(x.shape) # torch.Size([1, 2, 3, 4, 5, 6]) - x = dh.fit(x, keepdims=[0, 2, 5]) - print(x.shape) # torch.Size([40, 1, 3, 6]) - x = dh.unfit(x) - print(x.shape) # torch.Size([2, 4, 5, 1, 3, 6]) - - Example 2: - import torch - - dh = DimHandler() - x = torch.rand(1, 2, 3, 4, 5, 6) # Example tensor - print(x.shape) # torch.Size([1, 2, 3, 4, 5, 6]) - x = dh.fit(x, keepdims=[0, 2, 5]) - print(x.shape) # torch.Size([40, 1, 3, 6]) - y = x.mean(axis=-2) # calculation on the kept dims - print(y.shape) # torch.Size([40, 1, 6]) - y = dh.unfit(y) - print(y.shape) # torch.Size([2, 4, 5, 1, 6]) - """ - - def __init__(self): - pass - # self.orig_shape = None - # self.keepdims = None - - def fit(self, x, keepdims=[]): - if isinstance(x, np.ndarray): - return self._fit_numpy(x, keepdims=keepdims) - elif isinstance(x, torch.Tensor): - return self._fit_torch(x, keepdims=keepdims) - - def _fit_numpy(self, x, keepdims=[]): - """ - Reshapes the input NumPy array by flattening the dimensions not in `keepdims` and moving the kept dimensions to the end. - - Arguments: - x (numpy.ndarray): The input array to be reshaped. - keepdims (list of int): The indices of the dimensions to keep. - - Returns: - x_flattened (numpy.ndarray): The reshaped array with kept dimensions moved to the end. - """ - assert len(keepdims) <= len( - x.shape - ), "keepdims cannot have more dimensions than the array itself." - - # Normalize negative indices to positive indices - total_dims = len(x.shape) - keepdims = [dim if dim >= 0 else total_dims + dim for dim in keepdims] - keepdims = sorted(set(keepdims)) - - self.shape_fit = x.shape - - non_keepdims = [ii for ii in range(len(self.shape_fit)) if ii not in keepdims] - - self.n_non_keepdims = [self.shape_fit[nkd] for nkd in non_keepdims] - self.n_keepdims = [self.shape_fit[kd] for kd in keepdims] - - # Permute the array dimensions so that the non-kept dimensions come first - new_order = non_keepdims + keepdims - x_permuted = np.transpose(x, axes=new_order) - - # Flatten the non-kept dimensions - x_flattened = x_permuted.reshape(-1, *self.n_keepdims) - - return x_flattened - - def _fit_torch(self, x, keepdims=[]): - """ - Reshapes the input tensor or array by flattening the dimensions not in `keepdims` and moving the kept dimensions to the end. - - Arguments: - x (torch.Tensor): The input tensor or array to be reshaped. - keepdims (list of int): The indices of the dimensions to keep. - - Returns: - x_flattend (torch.Tensor): The reshaped tensor or array with kept dimensions moved to the end. - - Note: - This method modifies the `orig_shape`, `keepdims`, `n_non_keepdims`, and `n_keepdims` attributes based on the input. - """ - assert len(keepdims) <= len( - x.shape - ), "keepdims cannot have more dimensions than the tensor itself." - - keepdims = torch.tensor(keepdims).clone().detach().cpu().int() - # Normalize negative indices to positive indices - total_dims = len(x.shape) - keepdims = [dim if dim >= 0 else total_dims + dim for dim in keepdims] - keepdims = sorted(set(keepdims)) - - self.shape_fit = x.shape - - non_keepdims = [ - int(ii) for ii in torch.arange(len(self.shape_fit)) if ii not in keepdims - ] - - self.n_non_keepdims = [self.shape_fit[nkd] for nkd in non_keepdims] - self.n_keepdims = [self.shape_fit[kd] for kd in keepdims] - - x_permuted = x.permute(*non_keepdims, *keepdims) - x_flattend = x_permuted.reshape(-1, *self.n_keepdims) - - return x_flattend - - def unfit(self, y): - """ - Restores the first dimension of reshaped tensor or array back to its original shape before the `fit` operation. - - Arguments: - y (torch.Tensor or numpy.array): The tensor or array to be restored to its original shape. - - Returns: - y_restored (torch.Tensor or numpy.array): The tensor or array restored to its original shape. - """ - self.shape_unfit = y.shape - return y.reshape(*self.n_non_keepdims, *self.shape_unfit[1:]) - - -if __name__ == "__main__": - import scitex - - # Start - CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start(sys, plt) - - # Example1: - scitex.gen.printc("Example 1") - dh = DimHandler() - x = torch.rand(1, 2, 3, 4, 5, 6) # Example tensor - print(x.shape) # torch.Size([1, 2, 3, 4, 5, 6]) - x = dh.fit(x, keepdims=[0, 2, 5]) - print(x.shape) # torch.Size([40, 1, 3, 6]) - x = dh.unfit(x) - print(x.shape) # torch.Size([2, 4, 5, 1, 3, 6]) - - # Example 2: - scitex.gen.printc("Example 2") - dh = DimHandler() - x = torch.rand(1, 2, 3, 4, 5, 6) # Example tensor - print(x.shape) # torch.Size([1, 2, 3, 4, 5, 6]) - x = dh.fit(x, keepdims=[0, 2, 5]) - print(x.shape) # torch.Size([40, 1, 3, 6]) - y = x.mean(axis=-2) # calculation on the kept dims - print(y.shape) # torch.Size([40, 1, 6]) - y = dh.unfit(y) - print(y.shape) # torch.Size([2, 4, 5, 1, 6]) - - # Close - scitex.session.close(CONFIG) - -# EOF - -""" -/ssh:ywatanabe@444:/home/ywatanabe/proj/entrance/scitex/gen/_DimHandler.py -""" - - -# EOF diff --git a/src/scitex/gen/_TimeStamper.py b/src/scitex/gen/_TimeStamper.py deleted file mode 100755 index b8141171..00000000 --- a/src/scitex/gen/_TimeStamper.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "ywatanabe (2024-11-07 16:06:50)" -# File: ./scitex_repo/src/scitex/gen/_TimeStamper.py - -import time -from typing import Optional, Union - -import pandas as pd - - -class TimeStamper: - """ - Functionality: - * Generates timestamps with comments and tracks elapsed time - * Records timestamps in a DataFrame for analysis - * Calculates time differences between timestamps - Input: - * Comments for each timestamp - * Format preference (simple or detailed) - Output: - * Formatted timestamp strings - * DataFrame with timestamp records - * Time differences between specified timestamps - Prerequisites: - * pandas - """ - - def __init__(self, is_simple: bool = True) -> None: - self.id: int = -1 - self.start_time: float = time.time() - self._is_simple: bool = is_simple - self._prev: float = self.start_time - self._df_record: pd.DataFrame = pd.DataFrame( - columns=[ - "timestamp", - "elapsed_since_start", - "elapsed_since_prev", - "comment", - "formatted_text", - ] - ) - - def __call__(self, comment: str = "", verbose: bool = False) -> str: - now: float = time.time() - from_start: float = now - self.start_time - from_prev: float = now - self._prev - - formatted_from_start: str = time.strftime("%H:%M:%S", time.gmtime(from_start)) - formatted_from_prev: str = time.strftime("%H:%M:%S", time.gmtime(from_prev)) - - self.id += 1 - self._prev = now - - text: str = ( - f"ID:{self.id} | {formatted_from_start} {comment} | " - if self._is_simple - else f"Time (id:{self.id}): total {formatted_from_start}, prev {formatted_from_prev} [hh:mm:ss]: {comment}\n" - ) - - self._df_record.loc[self.id] = [ - now, - from_start, - from_prev, - comment, - text, - ] - - if verbose: - print(text) - return text - - @property - def record(self) -> pd.DataFrame: - """Returns the record DataFrame without the formatted_text column.""" - return self._df_record[ - [ - "timestamp", - "elapsed_since_start", - "elapsed_since_prev", - "comment", - ] - ] - - def delta(self, id1: int, id2: int) -> float: - """Calculates time difference between two timestamps. - - Parameters - ---------- - id1 : int - First timestamp ID - id2 : int - Second timestamp ID - - Returns - ------- - float - Time difference in seconds - - Raises - ------ - ValueError - If IDs don't exist in records - """ - if id1 < 0: - id1 = len(self._df_record) + id1 - if id2 < 0: - id2 = len(self._df_record) + id2 - - if not all(idx in self._df_record.index for idx in [id1, id2]): - raise ValueError("Invalid timestamp ID(s)") - - return ( - self._df_record.loc[id1, "timestamp"] - - self._df_record.loc[id2, "timestamp"] - ) - - -if __name__ == "__main__": - ts = TimeStamper(is_simple=True) - ts("Starting process") - time.sleep(1) - ts("One second later") - time.sleep(2) - ts("Two seconds later") - - -# EOF - -# #!/usr/bin/env python3 -# # -*- coding: utf-8 -*- -# # Time-stamp: "ywatanabe (2024-11-07 16:06:50)" -# # File: ./scitex_repo/src/scitex/gen/_TimeStamper.py - -# import time -# import pandas as pd - - -# class TimeStamper: -# """ -# A class for generating timestamps with optional comments, tracking both the time since object creation and since the last call. -# """ - -# def __init__(self, is_simple=True): -# self.id = -1 -# self.start_time = time.time() -# self._is_simple = is_simple -# self._prev = self.start_time -# self._df_record = pd.DataFrame( -# columns=[ -# "timestamp", -# "elapsed_since_start", -# "elapsed_since_prev", -# "comment", -# "formatted_text", -# ] -# ) - -# def __call__(self, comment="", verbose=False): -# now = time.time() -# from_start = now - self.start_time -# from_prev = now - self._prev -# formatted_from_start = time.strftime( -# "%H:%M:%S", time.gmtime(from_start) -# ) -# formatted_from_prev = time.strftime("%H:%M:%S", time.gmtime(from_prev)) -# self.id += 1 -# self._prev = now -# text = ( -# f"ID:{self.id} | {formatted_from_start} {comment} | " -# if self._is_simple -# else f"Time (id:{self.id}): total {formatted_from_start}, prev {formatted_from_prev} [hh:mm:ss]: {comment}\n" -# ) - -# # Update DataFrame directly -# self._df_record.loc[self.id] = [ -# now, -# from_start, -# from_prev, -# comment, -# text, -# ] - -# if verbose: -# print(text) -# return text - -# @property -# def record(self): -# return self._df_record[ -# [ -# "timestamp", -# "elapsed_since_start", -# "elapsed_since_prev", -# "comment", -# ] -# ] - -# def delta(self, id1, id2): -# """ -# Calculate the difference in seconds between two timestamps identified by their IDs. - -# Parameters: -# id1 (int): The ID of the first timestamp. -# id2 (int): The ID of the second timestamp. - -# Returns: -# float: The difference in seconds between the two timestamps. - -# Raises: -# ValueError: If either id1 or id2 is not in the DataFrame index. -# """ -# # Adjust for negative indices, similar to negative list indexing in Python -# if id1 < 0: -# id1 = len(self._df_record) + id1 -# if id2 < 0: -# id2 = len(self._df_record) + id2 - -# # Check if both IDs exist in the DataFrame -# if ( -# id1 not in self._df_record.index -# or id2 not in self._df_record.index -# ): -# raise ValueError( -# "One or both of the IDs do not exist in the record." -# ) - -# # Compute the difference in timestamps -# time_diff = ( -# self._df_record.loc[id1, "timestamp"] -# - self._df_record.loc[id2, "timestamp"] -# ) -# return time_diff - - -# if __name__ == "__main__": -# ts = TimeStamper(is_simple=True) -# ts("Starting process") -# time.sleep(1) -# ts("One second later") -# time.sleep(2) -# ts("Two seconds later") - - -# # EOF diff --git a/src/scitex/gen/__init__.py b/src/scitex/gen/__init__.py index 1258f1ca..e9ed00e3 100755 --- a/src/scitex/gen/__init__.py +++ b/src/scitex/gen/__init__.py @@ -1,202 +1,13 @@ -#!/usr/bin/env python3 -"""Scitex gen module. +"""SciTeX gen — thin compatibility shim for scitex-gen.""" -NOTE: This module is being refactored. Many functions are being moved to -more appropriate locations. For backward compatibility, they are re-exported -here with deprecation warnings. +import sys as _sys -Recommended imports: -- ci -> scitex.stats.descriptive.ci -- check_host, is_host, verify_host -> scitex.os -- detect_environment, is_notebook, is_script -> scitex.context -- list_api -> scitex.introspect -- run_shellcommand, run_shellscript -> scitex.sh -- xml2dict, XmlDictConfig, XmlListConfig -> scitex.io -- title_case -> scitex.str -- symlink -> scitex.path -""" - -import warnings - - -def _deprecation_warning(old_path, new_path): - warnings.warn( - f"{old_path} is deprecated, use {new_path} instead", - DeprecationWarning, - stacklevel=3, - ) - - -# ci -> scitex_stats.descriptive.ci (with re-export for backward compat) -from scitex_stats.descriptive import ci - -# Optional: DimHandler requires torch -try: - from ._DimHandler import DimHandler -except ImportError: - DimHandler = None -# check_host moved to scitex.os (re-export for backward compatibility) -from scitex.os import check_host, is_host, verify_host - -from ._alternate_kwarg import alternate_kwarg -from ._cache import cache -from ._deprecated_close import close as _deprecated_close -from ._deprecated_close import running2finished as _deprecated_running2finished - -# _start.py moved to old/ directory - functionality now in scitex.session -# BACKWARD COMPATIBILITY: Import deprecated wrappers -from ._deprecated_start import start as _deprecated_start - -# _close.py moved to old/ directory - functionality now in scitex.session -# Optional: _embed requires torch -try: - from ._embed import embed -except ImportError: - embed = None -# list_api moved to scitex.introspect (re-export for backward compatibility) -from scitex.introspect import list_api - -from ._is_ipython import is_ipython, is_script -from ._less import less -from ._list_packages import list_packages, main -from ._mat2py import ( - dir2npy, - keys2npa, - mat2dict, - mat2npa, - mat2npy, - public_keys, - save_npa, -) - -# Optional: _norm requires torch try: - from ._norm import clip_perc, to_01, to_nan01, to_nanz, to_z, unbias -except ImportError: - clip_perc = None - to_01 = None - to_nan01 = None - to_nanz = None - to_z = None - unbias = None -# shell functions moved to scitex.sh (re-export for backward compatibility) -from scitex.sh import run_shellcommand, run_shellscript - -from ._paste import paste -from ._print_config import print_config, print_config_main -from ._src import src -from ._TimeStamper import TimeStamper - -# Override the imported functions with deprecated wrappers -start = _deprecated_start -close = _deprecated_close -running2finished = _deprecated_running2finished - -# environment detection moved to scitex.context (re-export for backward compatibility) -from scitex.context import ( - detect_environment, - get_notebook_directory, - get_notebook_info_simple, - get_notebook_name, - get_notebook_path, - get_output_directory, - is_notebook, -) - -# title_case moved to scitex.str (re-export for backward compatibility) -from scitex.str import title_case - -from ._symlink import symlink -from ._symlog import symlog -from ._title2path import title2path -from ._to_even import to_even -from ._to_odd import to_odd - -# Optional: _to_rank requires torch -try: - from ._to_rank import to_rank -except ImportError: - to_rank = None -from ._transpose import transpose - -# Optional: _type and _var_info require torch -try: - from ._type import ArrayLike, var_info -except ImportError: - ArrayLike = None - var_info = None - -try: - from ._var_info import ArrayLike, var_info -except ImportError: - pass # Already set to None above -from ._wrap import wrap -from ._xml2dict import XmlDictConfig, XmlListConfig, xml2dict - -# Import from misc module -from .misc import connect_nums, float_linspace + import scitex_gen as _real +except ImportError as _e: + raise ImportError( + "scitex.gen requires the 'scitex-gen' package. " + "Install with: pip install scitex[gen] (or: pip install scitex-gen)" + ) from _e -__all__ = [ - "ArrayLike", - "ArrayLike", - "DimHandler", - "Tee", - "TimeStamper", - "XmlDictConfig", - "XmlListConfig", - "alternate_kwarg", - "cache", - "check_host", - "ci", - "clip_perc", - "close", - "connect_nums", - "dir2npy", - "embed", - "float_linspace", - "list_api", - "is_host", - "is_ipython", - "is_script", - "keys2npa", - "less", - "list_packages", - "mat2dict", - "mat2npa", - "mat2npy", - "paste", - "print_config", - "print_config_main", - "public_keys", - "run_shellcommand", - "run_shellscript", - "running2finished", - "save_npa", - "src", - "start", - "symlink", - "symlog", - "tee", - "title2path", - "title_case", - "to_01", - "to_even", - "to_nan01", - "to_nanz", - "to_odd", - "to_rank", - "to_z", - "transpose", - "unbias", - "var_info", - "var_info", - "verify_host", - "wrap", - "xml2dict", - "detect_environment", - "get_output_directory", - "is_notebook", - "get_notebook_path", - "get_notebook_name", - "get_notebook_directory", -] +_sys.modules[__name__] = _real diff --git a/src/scitex/gen/_alternate_kwarg.py b/src/scitex/gen/_alternate_kwarg.py deleted file mode 100755 index 65fedba6..00000000 --- a/src/scitex/gen/_alternate_kwarg.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-02 13:30:41 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_alternate_kwarg.py - - -def alternate_kwarg(kwargs, primary_key, alternate_key): - alternate_value = kwargs.pop(alternate_key, None) - kwargs[primary_key] = kwargs.get(primary_key) or alternate_value - return kwargs - - -# EOF diff --git a/src/scitex/gen/_cache.py b/src/scitex/gen/_cache.py deleted file mode 100755 index 298fb17c..00000000 --- a/src/scitex/gen/_cache.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-02 13:30:24 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_cache.py - -from functools import lru_cache - -cache = lru_cache(maxsize=None) - - -# EOF diff --git a/src/scitex/gen/_deprecated_close.py b/src/scitex/gen/_deprecated_close.py deleted file mode 100755 index 9e3ff914..00000000 --- a/src/scitex/gen/_deprecated_close.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-08-22 17:05:52 (ywatanabe)" -# File: /home/ywatanabe/proj/scitex_repo/src/scitex/gen/_deprecated_close.py -# ---------------------------------------- -from __future__ import annotations - -import os - -__FILE__ = __file__ -__DIR__ = os.path.dirname(__FILE__) -# ---------------------------------------- - -""" -Deprecated wrapper for the old scitex.gen.close function. - -This module provides backward compatibility by forwarding calls to the new -scitex.session.close function while showing deprecation warnings. -""" - -from scitex.decorators._deprecated import deprecated - - -@deprecated( - "Use scitex.session.close instead. The old interface will be removed in a future version." -) -def close(*args, **kwargs): - """Deprecated close function - use scitex.session.close instead. - - This function provides backward compatibility for existing code that uses - scitex.gen.close(). It forwards all calls to the new scitex.session.close() - function while displaying a deprecation warning. - - Parameters - ---------- - *args : tuple - Positional arguments passed to scitex.session.close() - **kwargs : dict - Keyword arguments passed to scitex.session.close() - - Returns - ------- - Any - Same return value as scitex.session.close() - """ - # Import here to avoid circular dependencies - from scitex.session import close as session_close - - return session_close(*args, **kwargs) - - -@deprecated( - "Use scitex.session.running2finished instead. The old interface will be removed in a future version." -) -def running2finished(*args, **kwargs): - """Deprecated running2finished function - use scitex.session.running2finished instead. - - This function provides backward compatibility for existing code that uses - scitex.gen.running2finished(). It forwards all calls to the new - scitex.session.running2finished() function while displaying a deprecation warning. - - Parameters - ---------- - *args : tuple - Positional arguments passed to scitex.session.running2finished() - **kwargs : dict - Keyword arguments passed to scitex.session.running2finished() - - Returns - ------- - Any - Same return value as scitex.session.running2finished() - """ - # Import here to avoid circular dependencies - from scitex.session import running2finished as session_running2finished - - return session_running2finished(*args, **kwargs) - - -__all__ = ["close", "running2finished"] - -# EOF diff --git a/src/scitex/gen/_deprecated_start.py b/src/scitex/gen/_deprecated_start.py deleted file mode 100755 index c0a3f20c..00000000 --- a/src/scitex/gen/_deprecated_start.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# File: /home/ywatanabe/proj/SciTeX-Code/src/scitex/gen/_deprecated_start.py - -""" -Deprecated wrapper for the old scitex.gen.start function. - -This module provides backward compatibility by forwarding calls to the new -scitex.session.start function while showing deprecation warnings. -""" - -from scitex.decorators._deprecated import deprecated - - -@deprecated( - reason="Use scitex.session.start instead. The old interface will be removed in a future version.", - forward_to="..session.start", -) -def start(*args, **kwargs): - """This docstring will be auto-generated by the @deprecated decorator.""" - # This function body will never be executed due to forwarding - # but we need a placeholder implementation - raise NotImplementedError( - "This function should be forwarded by the @deprecated decorator" - ) - - -__all__ = ["start"] diff --git a/src/scitex/gen/_detect_notebook_path.py b/src/scitex/gen/_detect_notebook_path.py deleted file mode 100755 index 0a44524e..00000000 --- a/src/scitex/gen/_detect_notebook_path.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-07-04 11:22:00 (ywatanabe)" -# File: ./src/scitex/gen/_detect_notebook_path.py - -""" -Detect Jupyter notebook filename for consistent output paths. - -When running in a notebook like ./examples/analysis.ipynb, -outputs should go to ./examples/analysis_out/ -""" - -import json -import os -from typing import Optional - -__all__ = ["get_notebook_path", "get_notebook_output_dir"] - - -def get_notebook_path() -> Optional[str]: - """ - Get the path of the currently running Jupyter notebook. - - Returns - ------- - Optional[str] - Path to the notebook file, or None if not in a notebook - - Examples - -------- - >>> path = get_notebook_path() - >>> print(path) - ./examples/my_analysis.ipynb - """ - try: - # Method 1: Try to get from IPython - ip = get_ipython() - - # Check if we're in a notebook - if not (ip and type(ip).__name__ == "ZMQInteractiveShell"): - return None - - # Method 2: Check IPython's notebook_name - if hasattr(ip, "notebook_name"): - return ip.notebook_name - - # Method 3: Try to get from kernel connection file - import glob - import re - - # Get kernel ID from the current session - kernel_id = re.search( - r"kernel-(.*?)\.json", ip.config["IPKernelApp"]["connection_file"] - ).group(1) - - # Search for notebook sessions - runtime_dir = os.environ.get( - "JUPYTER_RUNTIME_DIR", os.path.expanduser("~/.local/share/jupyter/runtime") - ) - - for nbserver in glob.glob(os.path.join(runtime_dir, "nbserver-*.json")): - try: - with open(nbserver, "r") as f: - server_info = json.load(f) - - # Check notebook sessions - import requests - - sessions_url = f"{server_info['url']}api/sessions?token={server_info.get('token', '')}" - response = requests.get(sessions_url) - - if response.status_code == 200: - sessions = response.json() - for session in sessions: - if session["kernel"]["id"] == kernel_id: - # Found our notebook! - notebook_path = session["notebook"]["path"] - return notebook_path - except: - continue - - # Method 4: Try JavaScript bridge (if available) - try: - import time - - from IPython.display import Javascript, display - - # This won't work in papermill, but works in interactive notebooks - display( - Javascript( - """ - IPython.notebook.kernel.execute( - `__notebook_path__ = '${IPython.notebook.notebook_path}'` - ); - """ - ) - ) - - # Brief pause for JS execution - time.sleep(0.1) - - if "__notebook_path__" in globals(): - return globals()["__notebook_path__"] - except: - pass - - except Exception: - pass - - return None - - -def get_notebook_output_dir(notebook_path: Optional[str] = None) -> Optional[str]: - """ - Get the output directory for a notebook. - - Parameters - ---------- - notebook_path : Optional[str] - Path to notebook, or None to auto-detect - - Returns - ------- - Optional[str] - Output directory path like ./examples/analysis_out/ - - Examples - -------- - >>> output_dir = get_notebook_output_dir() - >>> print(output_dir) - ./examples/analysis_out/ - - >>> output_dir = get_notebook_output_dir('./docs/tutorial.ipynb') - >>> print(output_dir) - ./docs/tutorial_out/ - """ - if notebook_path is None: - notebook_path = get_notebook_path() - - if not notebook_path: - return None - - # Get directory and base name - notebook_dir = os.path.dirname(notebook_path) or "." - notebook_base = os.path.splitext(os.path.basename(notebook_path))[0] - - # Create output directory path - output_dir = os.path.join(notebook_dir, f"{notebook_base}_out") - - return output_dir - - -def detect_notebook_from_cwd() -> Optional[str]: - """ - Fallback: Try to detect notebook from current working directory. - - If there's exactly one .ipynb file in the current directory, - assume that's the running notebook. - - Returns - ------- - Optional[str] - Path to notebook or None - """ - import glob - - notebooks = glob.glob("*.ipynb") - - # Only use this method if there's exactly one notebook - if len(notebooks) == 1: - return notebooks[0] - - return None - - -# EOF diff --git a/src/scitex/gen/_embed.py b/src/scitex/gen/_embed.py deleted file mode 100755 index 88dee526..00000000 --- a/src/scitex/gen/_embed.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -This script does XYZ. -""" - -# import os -# import sys - -# import matplotlib.pyplot as plt - -# # Imports -# -# import numpy as np -# import pandas as pd -# import torch -# import torch.nn as nn -# import torch.nn.functional as F - -# # Config -# CONFIG = scitex.gen.load_configs() - -# Functions -# from IPython import embed as _embed -# import pyperclip - -# def embed_with_clipboard_exec(): -# # Try to get text from the clipboard -# try: -# clipboard_content = pyperclip.paste() -# except pyperclip.PyperclipException as e: -# clipboard_content = "" -# print("Could not access the clipboard:", e) - -# # Start IPython session with the clipboard content preloaded -# ipython_shell = embed(header='IPython is now running with the following clipboard content executed:', compile_flags=None) - -# # Optionally, execute the clipboard content automatically -# if clipboard_content: -# # Execute the content as if it was typed in directly -# ipython_shell.run_cell(clipboard_content) - - -def embed(): - import pyperclip - from IPython import embed as _embed - - try: - clipboard_content = pyperclip.paste() - except pyperclip.PyperclipException as e: - clipboard_content = "" - print("Could not access the clipboard:", e) - - print("Clipboard content loaded. Do you want to execute it? [y/n]") - execute_clipboard = input().strip().lower() == "y" - - # Start IPython shell - ipython_shell = _embed( - header="IPython is now running. Clipboard content will be executed if confirmed." - ) - - # Execute if confirmed - if clipboard_content and execute_clipboard: - ipython_shell.run_cell(clipboard_content) - - -if __name__ == "__main__": - import sys - - import matplotlib.pyplot as plt - - import scitex - - # Start - CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start(sys, plt) - - embed() - - # Close - scitex.session.close(CONFIG) - -# EOF - -""" -/ssh:ywatanabe@444:/home/ywatanabe/proj/entrance/scitex/gen/_embed.py -""" diff --git a/src/scitex/gen/_is_ipython.py b/src/scitex/gen/_is_ipython.py deleted file mode 100755 index d34e44a7..00000000 --- a/src/scitex/gen/_is_ipython.py +++ /dev/null @@ -1,12 +0,0 @@ -def is_ipython(): - try: - __IPYTHON__ - ipython_mode = True - except NameError: - ipython_mode = False - - return ipython_mode - - -def is_script(): - return not is_ipython() diff --git a/src/scitex/gen/_less.py b/src/scitex/gen/_less.py deleted file mode 100755 index 8c651e5d..00000000 --- a/src/scitex/gen/_less.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-03 02:11:18 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_less.py -#!./env/bin/python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-04-21 12:05:35" -# Author: Yusuke Watanabe (ywatanabe@scitex.ai) - -""" -This script does XYZ. -""" - -import sys - -import matplotlib.pyplot as plt - -import scitex - -# Imports - -# # Config -# CONFIG = scitex.gen.load_configs() - - -# Functions -def less(output): - """ - Print the given output using `less` in an IPython or IPdb session. - """ - import os - import tempfile - - from IPython import get_ipython - - # Create a temporary file to hold the output - with tempfile.NamedTemporaryFile(delete=False, mode="w+t") as tmpfile: - # Write the output to the temporary file - tmpfile.write(output) - tmpfile_name = tmpfile.name - - # Use IPython's system command access to pipe the content of the temporary file to `less` - get_ipython().system(f"less {tmpfile_name}") - - # Clean up the temporary file - os.remove(tmpfile_name) - - -# EOF diff --git a/src/scitex/gen/_list_packages.py b/src/scitex/gen/_list_packages.py deleted file mode 100755 index 172233e4..00000000 --- a/src/scitex/gen/_list_packages.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 -# Time-stamp: "2024-11-03 02:11:54 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_list_packages.py -""" -Functionality: - * Lists and analyzes installed Python packages and their modules -Input: - * None (uses importlib.metadata to get installed packages) -Output: - * DataFrame containing package module information -Prerequisites: - * importlib.metadata (Python 3.8+) or importlib_metadata, pandas -""" - -import sys -from typing import Optional - -import pandas as pd - -try: - # Python 3.8+ standard library - from importlib.metadata import distributions -except ImportError: - # Fallback for older Python versions - from importlib_metadata import distributions - - -def list_packages( - max_depth: int = 1, - root_only: bool = True, - skip_errors: bool = True, - verbose: bool = False, -) -> pd.DataFrame: - """Lists all installed packages and their modules.""" - sys.setrecursionlimit(10_000) - - # Skip known problematic packages - skip_patterns = [ - "nvidia", - "cuda", - "pillow", - "fonttools", - "ipython", - "jsonschema", - "readme", - "importlib-metadata", - ] - - # Get installed packages, excluding problematic ones - installed_packages = [ - dist.name.replace("-", "_") - for dist in distributions() - if not any(pat in dist.name.lower() for pat in skip_patterns) - ] - - # Focus on commonly used packages first - safelist = [ - "numpy", - "pandas", - "scipy", - "matplotlib", - "sklearn", - "torch", - "tensorflow", - "keras", - "xarray", - "dask", - "pytest", - "requests", - "flask", - "django", - "seaborn", - ] - - # Prioritize safelist packages - installed_packages = [pkg for pkg in installed_packages if pkg in safelist] + [ - pkg for pkg in installed_packages if pkg not in safelist - ] - - from scitex.introspect import list_api - - all_dfs = [] - for package_name in installed_packages: - try: - df = list_api( - package_name, - docstring=False, # Speed up by skipping docstrings - print_output=False, - columns=["Name"], - root_only=root_only, - max_depth=max_depth, - skip_depwarnings=True, - ) - if not df.empty: - all_dfs.append(df) - except Exception as err: - if verbose: - print(f"Error processing {package_name}: {err}") - if not skip_errors: - raise - - if not all_dfs: - return pd.DataFrame(columns=["Name"]) - - combined_df = pd.concat(all_dfs, ignore_index=True) - return combined_df.drop_duplicates().sort_values("Name") - - -def main() -> Optional[int]: - """Main function for testing package listing functionality.""" - df = list_packages(verbose=True) - __import__("ipdb").set_trace() - return 0 - - -if __name__ == "__main__": - import matplotlib.pyplot as plt - - import scitex - - CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start( - sys, - plt, - verbose=False, - agg=True, - ) - - exit_status = main() - - scitex.session.close( - CONFIG, - verbose=False, - sys=sys, - notify=False, - message="", - exit_status=exit_status, - ) - -# EOF diff --git a/src/scitex/gen/_mat2py.py b/src/scitex/gen/_mat2py.py deleted file mode 100755 index 828a1646..00000000 --- a/src/scitex/gen/_mat2py.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-03 18:57:14 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_mat2py.py - -"""Helper script for loading .mat files into python. -For .mat with multiple variables use mat2dict to get return dictionary with .mat variables. -For .mat with 1 matrix use mat2npa to return np.array -For .mat with 1 matrix use mat2npy to save np.array to .npy -For multiple .mat files with 1 matrix use dir2npy to save 1 np.array of each .mat to .npy - - -Examples: -mat2py.mat2npa(fname = '/vol/ccnlab-scratch1/julber/chill_nn_regression/data/chill_wav_time_16kHz.mat', typ = np.float32) -mat2py.dir2npa(dir = '/vol/ccnlab-scratch1/julber/phoneme_decoding/data/', typ = np.float32, regex = '*xdata') -mat2py.dir2npa(dir = '/vol/ccnlab-scratch1/julber/phoneme_decoding/data/', typ = np.int32, regex = '*ylabels') - - -September 07, 2017 -JB""" - -import os -from glob import glob as _glob - -import h5py -import numpy as np -from scipy.io import loadmat - - -def mat2dict(fname): - """Function returns a dictionary with .mat variables""" - try: - D = h5py.File(fname) - d = {} - for key, value in D.items(): - d[key] = value - d["__hdf__"] = True - except: - d = loadmat(fname) - d["__hdf__"] = False - return d - - -def keys2npa(d, typ): - import pdb - - pdb.set_trace() - d2 = {} - for key in public_keys(d): - x = np.array(d[key], dtype=typ) - if d["__hdf__"]: - x = np.squeeze(np.swapaxes(x, 0, -1)) - assert type(x.flatten()[0]) == typ - d2[key] = x.copy() - return d2 - - -def public_keys(d): - return [k for k in d.keys() if not k.startswith("_")] - - -def mat2npa(fname, typ): - """Function returns np array from 1st entry in .mat file""" - import pdb - - pdb.set_trace() - d = keys2npa(mat2dict(fname), typ) - return d[d.keys()[0]] - - -def save_npa(fname, x): - np.save(fname, x) - - -def mat2npy(fname, typ): - """Function save np array from 1st entry in .mat file to .npy file""" - x = mat2npa(fname, typ) - save_npa(fname=fname.replace(".mat", ""), x=x) - - -def dir2npy(dir, typ, regex="*"): - """Function saves np array from 1st entry in each regex + .mat file in dir""" - os.chdir(dir) - for fname in _glob(regex + ".mat"): - print("File " + fname + " to" + " .npa") - mat2npy(dir + fname, typ) - - -# EOF diff --git a/src/scitex/gen/_norm.py b/src/scitex/gen/_norm.py deleted file mode 100755 index d05447ff..00000000 --- a/src/scitex/gen/_norm.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-19 01:09:55 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_norm.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/_norm.py" - -import torch - -from scitex.decorators import torch_fn -from scitex.torch import nanstd - - -@torch_fn -def to_z(x, axis=-1, dim=None, device="cuda"): - """Standardizes tensor to zero mean and unit variance along specified dimension. - - Parameters - ---------- - xx : torch.Tensor - Input tensor - dim : int, optional - Dimension along which to standardize (preferred) - axis : int, optional - Alternative to dim for numpy compatibility - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Z-scored tensor - """ - return (x - x.mean(dim=dim, keepdim=True)) / x.std(dim=dim, keepdim=True) - - -@torch_fn -def to_nanz(x, axis=-1, dim=None, device="cuda"): - """Standardizes tensor handling NaN values along specified dimension. - - Parameters - ---------- - xx : torch.Tensor - Input tensor - dim : int, optional - Dimension along which to standardize (preferred) - axis : int, optional - Alternative to dim for numpy compatibility - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Z-scored tensor with NaN handling - """ - nan_mean = torch.nanmean(x, dim=dim, keepdim=True) - nan_std = nanstd(x, dim=dim, keepdim=True) - return (x - nan_mean) / nan_std - - -@torch_fn -def to_01(x, axis=-1, dim=None, device="cuda"): - """Min-max scales tensor to [0, 1] range along specified dimension. - - Parameters - ---------- - xx : torch.Tensor - Input tensor - dim : int, optional - Dimension along which to scale (preferred) - axis : int, optional - Alternative to dim for numpy compatibility - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Min-max scaled tensor - """ - # Use dim if provided, otherwise use axis - dimension = dim if dim is not None else axis - - if dimension is None: - # Scale entire tensor - x_min = x.min() - x_max = x.max() - else: - # Scale along specified dimension - x_min = x.min(dim=dimension, keepdim=True)[0] - x_max = x.max(dim=dimension, keepdim=True)[0] - - # Avoid division by zero - return (x - x_min) / (x_max - x_min + 1e-8) - - -@torch_fn -def to_nan01(x, axis=-1, dim=None, device="cuda"): - """Min-max scales tensor handling NaN values along specified dimension. - - Parameters - ---------- - xx : torch.Tensor - Input tensor - dim : int, optional - Dimension along which to scale (preferred) - axis : int, optional - Alternative to dim for numpy compatibility - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Min-max scaled tensor with NaN handling - """ - # Use dim if provided, otherwise use axis - dimension = dim if dim is not None else axis - - if dimension is None: - # Scale entire tensor - x_min = torch.nanmin(x) - x_max = torch.nanmax(x) - else: - # Scale along specified dimension - x_min = torch.nanmin(x, dim=dimension, keepdim=True)[0] - x_max = torch.nanmax(x, dim=dimension, keepdim=True)[0] - - # Avoid division by zero - return (x - x_min) / (x_max - x_min + 1e-8) - - -@torch_fn -def unbias(x, axis=-1, dim=None, fn="mean", device="cuda"): - """Removes bias from tensor using specified method along dimension. - - Parameters - ---------- - xx : torch.Tensor - Input tensor - dim : int, optional - Dimension along which to unbias (preferred) - axis : int, optional - Alternative to dim for numpy compatibility - fn : str - Method to use for unbiasing ('mean' or 'min') - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Unbiased tensor - """ - if fn == "mean": - return x - x.mean(dim=dim, keepdims=True) - if fn == "min": - return x - x.min(dim=dim, keepdims=True)[0] - raise ValueError(f"Unsupported unbiasing method: {fn}") - - -@torch_fn -def clip_perc( - x, - lower_perc=2.5, - upper_perc=97.5, - low=None, - high=None, - axis=-1, - dim=None, - device="cuda", -): - """Clips tensor values between specified percentiles along dimension. - - Parameters - ---------- - x : torch.Tensor - Input tensor - lower_perc : float - Lower percentile (0-100) - upper_perc : float - Upper percentile (0-100) - low : float, optional - Alternative name for lower_perc - high : float, optional - Alternative name for upper_perc - dim : int - Dimension along which to compute percentiles (preferred) - axis : int - Alternative to dim for numpy compatibility - device : str - Device to use for computation - - Returns - ------- - torch.Tensor - Clipped tensor - """ - # Handle alternative parameter names - if low is not None: - lower_perc = low - if high is not None: - upper_perc = high - - # Use dim if provided, otherwise use axis - dimension = dim if dim is not None else axis - - lower = torch.quantile(x, lower_perc / 100, dim=dimension, keepdim=True) - upper = torch.quantile(x, upper_perc / 100, dim=dimension, keepdim=True) - return torch.clamp(x, min=lower, max=upper) - - -# EOF diff --git a/src/scitex/gen/_norm_cache.py b/src/scitex/gen/_norm_cache.py deleted file mode 100755 index 4211fde8..00000000 --- a/src/scitex/gen/_norm_cache.py +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2025-07-25 05:15:00" -# File: _norm_cache.py - -""" -Cached normalization functions for improved performance on repeated operations. -""" - -import hashlib -import weakref -from functools import lru_cache -from typing import Optional, Tuple, Union - -import numpy as np -import torch - -from scitex.decorators import torch_fn - -# Cache for normalized data -_norm_cache = weakref.WeakValueDictionary() -_cache_metadata = {} -_cache_config = {"enabled": True, "max_size": 64, "verbose": False} - - -def _get_array_key( - x: Union[np.ndarray, torch.Tensor], axis: Optional[int] = None -) -> str: - """ - Generate a unique key for an array based on its content and parameters. - - Parameters - ---------- - x : array-like - Input array - axis : int, optional - Axis parameter - - Returns - ------- - str - Unique hash key - """ - # Create a hash based on array properties - if isinstance(x, torch.Tensor): - # For tensors, use shape, dtype, device, and a sample of values - props = f"torch_{x.shape}_{x.dtype}_{x.device}_{axis}" - # Sample a few values for hash (avoid full array for performance) - if x.numel() > 100: - sample = x.flatten()[:: max(1, x.numel() // 100)][:100] - else: - sample = x.flatten() - props += f"_{sample.sum().item():.6f}_{sample.std().item():.6f}" - else: - # For numpy arrays - props = f"numpy_{x.shape}_{x.dtype}_{axis}" - if x.size > 100: - sample = x.flatten()[:: max(1, x.size // 100)][:100] - else: - sample = x.flatten() - props += f"_{np.sum(sample):.6f}_{np.std(sample):.6f}" - - return hashlib.md5(props.encode()).hexdigest() - - -def _check_cache( - key: str, x: Union[np.ndarray, torch.Tensor] -) -> Optional[Union[np.ndarray, torch.Tensor]]: - """Check if cached result exists and is valid.""" - if not _cache_config["enabled"]: - return None - - if key in _cache_metadata: - # Verify array hasn't changed by checking a few properties - cached_info = _cache_metadata[key] - - if isinstance(x, torch.Tensor): - current_sum = x.sum().item() - current_mean = x.mean().item() - else: - current_sum = np.sum(x) - current_mean = np.mean(x) - - # Check if values match (within floating point tolerance) - if ( - abs(cached_info["sum"] - current_sum) < 1e-10 - and abs(cached_info["mean"] - current_mean) < 1e-10 - ): - # Try to get from weak reference cache - if key in _norm_cache: - if _cache_config["verbose"]: - print(f"[Norm Cache HIT] {cached_info['op']}") - return _norm_cache[key] - - return None - - -def _store_cache( - key: str, - x: Union[np.ndarray, torch.Tensor], - result: Union[np.ndarray, torch.Tensor], - op: str, -) -> None: - """Store result in cache.""" - if not _cache_config["enabled"]: - return - - # Store metadata - if isinstance(x, torch.Tensor): - _cache_metadata[key] = { - "sum": x.sum().item(), - "mean": x.mean().item(), - "shape": x.shape, - "op": op, - } - else: - _cache_metadata[key] = { - "sum": np.sum(x), - "mean": np.mean(x), - "shape": x.shape, - "op": op, - } - - # Try to store in weak reference cache - try: - _norm_cache[key] = result - except TypeError: - # Some types can't be weakly referenced - pass - - # Implement size limit - if len(_cache_metadata) > _cache_config["max_size"]: - # Remove oldest entries - oldest = list(_cache_metadata.keys())[0] - del _cache_metadata[oldest] - if oldest in _norm_cache: - del _norm_cache[oldest] - - -# Cached version of to_z -@torch_fn -def to_z_cached(x, axis=-1, dim=None, device="cuda"): - """ - Cached version of z-score normalization. - - Caches results for repeated normalizations of the same data. - """ - # Generate cache key - dimension = dim if dim is not None else axis - cache_key = _get_array_key(x, dimension) + "_z" - - # Check cache - cached = _check_cache(cache_key, x) - if cached is not None: - return cached - - # Compute normalization - if isinstance(x, torch.Tensor): - result = (x - x.mean(dim=dimension, keepdim=True)) / x.std( - dim=dimension, keepdim=True - ) - else: - result = (x - np.mean(x, axis=dimension, keepdims=True)) / np.std( - x, axis=dimension, keepdims=True - ) - - # Store in cache - _store_cache(cache_key, x, result, "z-score") - - return result - - -# Cached version of to_01 -@torch_fn -def to_01_cached(x, axis=-1, dim=None, device="cuda"): - """ - Cached version of min-max normalization. - - Caches results for repeated normalizations of the same data. - """ - # Generate cache key - dimension = dim if dim is not None else axis - cache_key = _get_array_key(x, dimension) + "_01" - - # Check cache - cached = _check_cache(cache_key, x) - if cached is not None: - return cached - - # Compute normalization - if isinstance(x, torch.Tensor): - if dimension is None: - x_min = x.min() - x_max = x.max() - else: - x_min = x.min(dim=dimension, keepdim=True)[0] - x_max = x.max(dim=dimension, keepdim=True)[0] - result = (x - x_min) / (x_max - x_min + 1e-8) - else: - if dimension is None: - x_min = np.min(x) - x_max = np.max(x) - else: - x_min = np.min(x, axis=dimension, keepdims=True) - x_max = np.max(x, axis=dimension, keepdims=True) - result = (x - x_min) / (x_max - x_min + 1e-8) - - # Store in cache - _store_cache(cache_key, x, result, "min-max") - - return result - - -def configure_norm_cache( - enabled: Optional[bool] = None, - max_size: Optional[int] = None, - verbose: Optional[bool] = None, -) -> None: - """ - Configure normalization cache settings. - - Parameters - ---------- - enabled : bool, optional - Enable or disable caching - max_size : int, optional - Maximum number of arrays to cache - verbose : bool, optional - Enable verbose output - """ - if enabled is not None: - _cache_config["enabled"] = enabled - if max_size is not None: - _cache_config["max_size"] = max_size - if verbose is not None: - _cache_config["verbose"] = verbose - - -def clear_norm_cache() -> None: - """Clear all cached normalization results.""" - _norm_cache.clear() - _cache_metadata.clear() - - -def get_norm_cache_info() -> dict: - """Get information about the normalization cache.""" - return { - "enabled": _cache_config["enabled"], - "max_size": _cache_config["max_size"], - "current_size": len(_cache_metadata), - "operations": [v["op"] for v in _cache_metadata.values()], - } - - -# Monkey patch the original functions if enabled -def patch_normalization_functions(): - """Replace original normalization functions with cached versions.""" - import scitex.gen._norm as norm_module - - # Store originals - norm_module.to_z_original = norm_module.to_z - norm_module.to_01_original = norm_module.to_01 - - # Replace with cached versions - norm_module.to_z = to_z_cached - norm_module.to_01 = to_01_cached - - # Also patch in the gen module namespace - try: - import scitex.gen as gen_module - - gen_module.to_z = to_z_cached - gen_module.to_01 = to_01_cached - except: - pass - - -# Auto-patch if enabled -import os - -if os.getenv("SCITEX_CACHE_NORM", "true").lower() == "true": - patch_normalization_functions() diff --git a/src/scitex/gen/_paste.py b/src/scitex/gen/_paste.py deleted file mode 100755 index 2ffeabaf..00000000 --- a/src/scitex/gen/_paste.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-03 02:13:54 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_paste.py -def paste(): - import textwrap - - import pyperclip - - try: - clipboard_content = pyperclip.paste() - clipboard_content = textwrap.dedent(clipboard_content) - exec(clipboard_content) - except Exception as e: - print(f"Could not execute clipboard content: {e}") - - -# EOF diff --git a/src/scitex/gen/_print_config.py b/src/scitex/gen/_print_config.py deleted file mode 100755 index ca4b264a..00000000 --- a/src/scitex/gen/_print_config.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-10-13 18:53:04 (ywatanabe)" -# /home/yusukew/proj/scitex_repo/src/scitex/gen/_print_config.py - -""" -1. Functionality: - - Prints configuration values from YAML files -2. Input: - - Configuration key (dot-separated for nested structures) -3. Output: - - Corresponding configuration value -4. Prerequisites: - - scitex package with load_configs function - -Example: - python _print_config.py PATH.TITAN.MAT -""" - -import argparse -import os -import sys -from pprint import pprint - - -def print_config(key): - CONFIG = scitex.io.load_configs() - - if key is None: - print("Available configurations:") - pprint(CONFIG) - return - - try: - keys = key.split(".") - value = CONFIG - for k in keys: - if isinstance(value, (dict, scitex.gen.utils._DotDict.DotDict)): - value = value.get(k) - - elif isinstance(value, list): - try: - value = value[int(k)] - except (ValueError, IndexError): - value = None - - elif isinstance(value, str): - break - - else: - value = None - - if value is None: - break - - print(value) - - except Exception as e: - print(f"Error: {e}") - print("Available configurations:") - pprint(value) - - -def print_config_main(args=None): - if args is None: - args = sys.argv[1:] - - parser = argparse.ArgumentParser(description="Print configuration values") - parser.add_argument( - "key", - nargs="?", - default=None, - help="Configuration key (dot-separated for nested structures)", - ) - parsed_args = parser.parse_args(args) - print_config(parsed_args.key) - - -if __name__ == "__main__": - print_config_main() diff --git a/src/scitex/gen/_skills/SKILL.md b/src/scitex/gen/_skills/SKILL.md deleted file mode 100644 index db574c61..00000000 --- a/src/scitex/gen/_skills/SKILL.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -name: stx.gen -description: General utilities collection in scitex. A legacy module with backward-compatible re-exports. Contains active implementations for tensor normalization, time profiling, dimension manipulation, numeric helpers, data inspection, caching, XML/MATLAB I/O, and interactive tools. Many functions have moved to purpose-specific modules. -user-invocable: false ---- - -# stx.gen — General Utilities - -`stx.gen` is a legacy utility collection. **For new code, import from the specific modules listed in each sub-skill.** Backward-compatible re-exports remain here with deprecation warnings where applicable. - -```python -import scitex as stx - -# All stx.gen functions are accessible as: -stx.gen.(...) -``` - ---- - -## Sub-skills - -### Active Implementations - -- [tensor-normalization.md](tensor-normalization.md) — `to_z`, `to_nanz`, `to_01`, `to_nan01`, `unbias`, `clip_perc`; optional caching layer (`_norm_cache`). Requires `torch`. - -- [timestamper.md](timestamper.md) — `TimeStamper` class: callable profiler that records labeled checkpoints with elapsed time in a pandas DataFrame. - -- [dim-handler.md](dim-handler.md) — `DimHandler` class: flatten non-target dimensions into a batch axis, compute, then restore original shape. Supports `torch.Tensor` and `numpy.ndarray`. Requires `torch`. - -- [numeric-utils.md](numeric-utils.md) — `to_even`, `to_odd`, `to_rank`, `symlog`, `transpose`, `connect_nums`, `float_linspace`. - -- [data-inspection.md](data-inspection.md) — `var_info`, `ArrayLike` type alias, `describe` (summary statistics). - -- [environment-detection.md](environment-detection.md) — `is_ipython`, `is_script`, `list_packages`; context re-exports (`is_notebook`, `detect_environment`, etc.). - -- [caching-decorators.md](caching-decorators.md) — `cache` (lru_cache alias), `alternate_kwarg` (multi-name kwarg support), `wrap` (functools.wraps pass-through). - -- [xml-matlab.md](xml-matlab.md) — `xml2dict`, `XmlDictConfig`, `XmlListConfig`; `mat2dict`, `public_keys`, `save_npa`, `mat2npy`, `dir2npy`. - -- [interactive-tools.md](interactive-tools.md) — `less` (pager), `src` (source viewer), `paste` (clipboard exec), `embed` (IPython shell), `symlink`, `title2path`. - -### Migration Reference - -- [deprecated.md](deprecated.md) — Complete table of what moved where: `start`/`close`/`running2finished` → `stx.session`; `ci` → `scitex_stats`; `check_host` → `stx.os`; `run_shellcommand` → `stx.sh`; `list_api` → `stx.introspect`; context functions → `stx.context`. - ---- - -## Quick reference - -| Function / Class | Sub-skill | Requires | -|-----------------|-----------|---------| -| `TimeStamper` | [timestamper.md](timestamper.md) | pandas | -| `DimHandler` | [dim-handler.md](dim-handler.md) | torch | -| `to_z`, `to_01`, `clip_perc`, ... | [tensor-normalization.md](tensor-normalization.md) | torch | -| `to_even`, `to_odd`, `symlog` | [numeric-utils.md](numeric-utils.md) | numpy | -| `to_rank` | [numeric-utils.md](numeric-utils.md) | torch | -| `transpose` | [numeric-utils.md](numeric-utils.md) | numpy | -| `connect_nums`, `float_linspace` | [numeric-utils.md](numeric-utils.md) | numpy | -| `var_info`, `ArrayLike` | [data-inspection.md](data-inspection.md) | torch, xarray | -| `describe` | [data-inspection.md](data-inspection.md) | pandas, numpy | -| `is_ipython`, `is_script` | [environment-detection.md](environment-detection.md) | — | -| `list_packages` | [environment-detection.md](environment-detection.md) | pandas | -| `cache` | [caching-decorators.md](caching-decorators.md) | — | -| `alternate_kwarg` | [caching-decorators.md](caching-decorators.md) | — | -| `wrap` | [caching-decorators.md](caching-decorators.md) | — | -| `xml2dict`, `XmlDictConfig` | [xml-matlab.md](xml-matlab.md) | — | -| `mat2dict`, `dir2npy` | [xml-matlab.md](xml-matlab.md) | h5py, scipy | -| `less`, `src`, `paste` | [interactive-tools.md](interactive-tools.md) | IPython / pyperclip | -| `embed` | [interactive-tools.md](interactive-tools.md) | IPython, pyperclip, torch | -| `symlink` | [interactive-tools.md](interactive-tools.md) | — | -| `title2path` | [interactive-tools.md](interactive-tools.md) | — | -| `start`, `close` | [deprecated.md](deprecated.md) | use `@stx.session` | -| `ci` | [deprecated.md](deprecated.md) | use `scitex_stats.descriptive.ci` | -| `check_host`, `is_host` | [deprecated.md](deprecated.md) | use `stx.os` | -| `run_shellcommand` | [deprecated.md](deprecated.md) | use `stx.sh` | diff --git a/src/scitex/gen/_skills/caching-decorators.md b/src/scitex/gen/_skills/caching-decorators.md deleted file mode 100644 index 5c2c1540..00000000 --- a/src/scitex/gen/_skills/caching-decorators.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -description: Decorator and kwarg utilities in stx.gen — cache (lru_cache alias), alternate_kwarg for accepting multiple keyword names, and wrap (functools.wraps pass-through). ---- - -# Caching and Decorator Utilities - ---- - -## cache - -An alias for `functools.lru_cache(maxsize=None)` — an unbounded memoization decorator. - -```python -cache = lru_cache(maxsize=None) -``` - -```python -import scitex as stx - -@stx.gen.cache -def expensive_fn(n: int) -> int: - return sum(range(n)) - -expensive_fn(1_000_000) # computed -expensive_fn(1_000_000) # cached — instant -``` - -Because `maxsize=None`, the cache grows without bound. For bounded caches, use `functools.lru_cache(maxsize=N)` directly. - -> **Note:** Cached functions cannot have mutable (unhashable) arguments (e.g., lists, dicts, numpy arrays). - ---- - -## alternate_kwarg - -Allows a function to accept two different keyword argument names for the same parameter. If the primary key is absent or falsy, the alternate key's value is used. - -```python -alternate_kwarg(kwargs: dict, primary_key: str, alternate_key: str) -> dict -``` - -| Parameter | Description | -|-----------|-------------| -| `kwargs` | The `**kwargs` dict to mutate | -| `primary_key` | The canonical/preferred keyword name | -| `alternate_key` | The legacy/alternative keyword name | - -The `alternate_key` is **popped** from `kwargs`. If `primary_key` is already set (and truthy), `alternate_key` is discarded. If `primary_key` is absent/falsy, it is set to the `alternate_key` value. - -```python -import scitex as stx - -def my_func(**kwargs): - kwargs = stx.gen.alternate_kwarg(kwargs, "learning_rate", "lr") - lr = kwargs.get("learning_rate") - return lr - -my_func(learning_rate=0.01) # 0.01 -my_func(lr=0.001) # 0.001 — "lr" is mapped to "learning_rate" -my_func(learning_rate=0.01, lr=0.001) # 0.01 — primary wins -``` - -**Typical pattern when wrapping a library function:** - -```python -def fit(X, y, **kwargs): - kwargs = stx.gen.alternate_kwarg(kwargs, "n_estimators", "n_trees") - kwargs = stx.gen.alternate_kwarg(kwargs, "random_state", "seed") - return sklearn_model.fit(X, y, **kwargs) -``` - ---- - -## wrap - -A minimal `functools.wraps`-based decorator factory that preserves the wrapped function's metadata. - -```python -wrap(func: callable) -> callable -``` - -Returns a wrapper that calls `func(*args, **kwargs)` and preserves `__name__`, `__doc__`, etc. via `@functools.wraps(func)`. - -```python -import scitex as stx - -@stx.gen.wrap -def add(a, b): - """Add two numbers.""" - return a + b - -add.__name__ # "add" -add.__doc__ # "Add two numbers." -add(1, 2) # 3 -``` - -This is intentionally minimal — use it when you want to add decoration infrastructure without changing behavior yet. diff --git a/src/scitex/gen/_skills/data-inspection.md b/src/scitex/gen/_skills/data-inspection.md deleted file mode 100644 index a2d4640e..00000000 --- a/src/scitex/gen/_skills/data-inspection.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -description: Variable inspection utilities in stx.gen — var_info for type/shape introspection, the ArrayLike type alias, and describe for summary statistics on DataFrames. ---- - -# Data Inspection - ---- - -## var_info - -Returns a dictionary with type and structural information about any variable. - -```python -var_info(variable: Any) -> dict -``` - -> **Note:** Requires `torch`, `xarray`. Returns `None` when torch is not installed (imported from `_type.py` or `_var_info.py`). - -**Returned keys** (depending on input type): - -| Key | Condition | Description | -|-----|-----------|-------------| -| `"type"` | always | `type(variable).__name__` | -| `"length"` | `hasattr(variable, "__len__")` | `len(variable)` | -| `"shape"` | ndarray, DataFrame, Series, DataArray, Tensor | `.shape` tuple | -| `"dimensions"` | same as above, or nested list | number of dimensions | - -For nested lists, the shape and depth are inferred by traversing `variable[0]` recursively. - -```python -import numpy as np -import scitex as stx - -data = np.array([[1, 2], [3, 4]]) -stx.gen.var_info(data) -# {'type': 'ndarray', 'length': 2, 'shape': (2, 2), 'dimensions': 2} - -stx.gen.var_info(42) -# {'type': 'int'} - -stx.gen.var_info([1, 2, 3]) -# {'type': 'list', 'length': 3} - -stx.gen.var_info([[1, 2], [3, 4]]) -# {'type': 'list', 'length': 2, 'shape': (2, 2), 'dimensions': 2} -``` - ---- - -## ArrayLike - -A `typing.Union` type alias grouping all common array-like types. - -```python -from scitex.gen import ArrayLike - -# Equivalent to: -# Union[list, tuple, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray, torch.Tensor] -``` - -Use in function annotations to accept any numeric container: - -```python -def process(data: ArrayLike) -> np.ndarray: - ... -``` - -> **Note:** Defined in both `_type.py` and `_var_info.py`; the import in `__init__.py` tries `_type.py` first, then `_var_info.py`. - ---- - -## describe - -Computes summary statistics for a DataFrame or array. - -```python -describe(df, method="mean_std", round_factor=3, axis=0) -> dict -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `df` | required | `pd.DataFrame` or array-like (coerced with `pd.DataFrame(df)`) | -| `method` | `"mean_std"` | One of `"mean_std"`, `"mean_ci"`, `"median_iqr"` | -| `round_factor` | `3` | Decimal places for rounding | -| `axis` | `0` | Axis along which to compute | - -**Return value by method:** - -| `method` | Keys returned | -|----------|--------------| -| `"mean_std"` | `{"n", "mean", "std"}` | -| `"mean_ci"` | `{"n", "mean", "ci"}` where `ci = 1.96 * std / sqrt(n)` | -| `"median_iqr"` | `{"n", "median", "iqr"}` | - -NaN values are silently excluded from computations (`nanmean`, `nanstd`, `notna().sum()`). - -```python -import pandas as pd -import scitex as stx - -data = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [10, 20, 30, 40, 50]}) - -stx.gen.describe(data, method="mean_std") -# {'n': A 5\nB 5, 'mean': A 3.0\nB 30.0, 'std': ...} - -stx.gen.describe(data, method="mean_ci") -# {'n': ..., 'mean': ..., 'ci': ...} - -stx.gen.describe(data, method="median_iqr") -# {'n': ..., 'median': ..., 'iqr': ...} -``` - -> **Note:** `describe` is in `misc.py` but not currently listed in `__all__`. Access via `stx.gen.describe` or import directly from `scitex.gen.misc`. diff --git a/src/scitex/gen/_skills/deprecated.md b/src/scitex/gen/_skills/deprecated.md deleted file mode 100644 index 1ca1b0d6..00000000 --- a/src/scitex/gen/_skills/deprecated.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -description: Deprecated functions and re-exports in stx.gen — start/close/running2finished (now in stx.session), host utilities (now in stx.os), shell commands (now in stx.sh), and other relocated functions. All trigger DeprecationWarning. ---- - -# Deprecated Functions and Re-exports - -`stx.gen` is a legacy module. Most of its functions have been relocated to purpose-specific modules. This page documents what moved where and the migration path. - ---- - -## Session lifecycle (DEPRECATED) - -These forward to `scitex.session` and emit `DeprecationWarning` via `@deprecated`. - -| Old name | Replacement | Notes | -|----------|-------------|-------| -| `stx.gen.start(...)` | `stx.session.start(...)` | Old session initializer | -| `stx.gen.close(...)` | `stx.session.close(...)` | Old session finalizer | -| `stx.gen.running2finished(...)` | `stx.session.running2finished(...)` | Moves session output dirs | - -```python -# Old code (still works, raises DeprecationWarning) -CONFIG, sys.stdout, sys.stderr, plt, CC = stx.gen.start(sys, plt) -stx.gen.close(CONFIG) - -# Recommended -@stx.session -def main(CONFIG=stx.INJECTED, plt=stx.INJECTED, logger=stx.INJECTED): - ... -``` - ---- - -## Host utilities (moved to stx.os) - -Re-exported from `scitex.os` — **no deprecation warning currently**, but prefer `stx.os.*` in new code. - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.check_host(name)` | `stx.os.check_host` | -| `stx.gen.is_host(name)` | `stx.os.is_host` | -| `stx.gen.verify_host(name)` | `stx.os.verify_host` | - ---- - -## Shell commands (moved to stx.sh) - -Re-exported from `scitex.sh` — no deprecation warning, but prefer `stx.sh.*`. - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.run_shellcommand(cmd)` | `stx.sh.run_shellcommand` | -| `stx.gen.run_shellscript(path)` | `stx.sh.run_shellscript` | - ---- - -## Statistics (moved to scitex_stats) - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.ci(data, alpha=0.05)` | `scitex_stats.descriptive.ci` | - -```python -# Old -ci = stx.gen.ci(data, alpha=0.05) - -# Preferred -from scitex_stats.descriptive import ci -ci = ci(data, alpha=0.05) -``` - ---- - -## Introspection (moved to stx.introspect) - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.list_api(pkg)` | `stx.introspect.list_api` | - ---- - -## Environment / context (moved to stx.context) - -Re-exported from `scitex.context` for backward compatibility. - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.detect_environment()` | `stx.context.detect_environment` | -| `stx.gen.is_notebook()` | `stx.context.is_notebook` | -| `stx.gen.get_notebook_path()` | `stx.context.get_notebook_path` | -| `stx.gen.get_notebook_name()` | `stx.context.get_notebook_name` | -| `stx.gen.get_notebook_directory()` | `stx.context.get_notebook_directory` | -| `stx.gen.get_output_directory()` | `stx.context.get_output_directory` | - -See also [environment-detection.md](environment-detection.md) for `is_ipython` and `is_script`, which remain native to `stx.gen`. - ---- - -## String utilities (moved to stx.str) - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.title_case(s)` | `stx.str.title_case` | - ---- - -## Path utilities (moved to stx.path) - -| Name | Preferred location | -|------|--------------------| -| `stx.gen.symlink(tgt, src)` | Lives in `stx.gen._symlink` (not yet moved); also documented in [interactive-tools.md](interactive-tools.md) | - ---- - -## Optional (require torch) - -These are set to `None` at import time if `torch` is not installed. No deprecation warning. - -| Name | Notes | -|------|-------| -| `stx.gen.DimHandler` | See [dim-handler.md](dim-handler.md) | -| `stx.gen.embed` | IPython embed with clipboard | -| `stx.gen.to_z`, `to_nanz`, `to_01`, `to_nan01`, `unbias`, `clip_perc` | See [tensor-normalization.md](tensor-normalization.md) | -| `stx.gen.to_rank` | See [numeric-utils.md](numeric-utils.md) | -| `stx.gen.ArrayLike`, `var_info` | See [data-inspection.md](data-inspection.md) | - ---- - -## Summary: where things moved - -``` -stx.gen.start / close → @stx.session decorator -stx.gen.ci → scitex_stats.descriptive.ci -stx.gen.check_host → stx.os -stx.gen.run_shellcommand → stx.sh -stx.gen.list_api → stx.introspect -stx.gen.detect_environment / is_notebook / ... → stx.context -stx.gen.title_case → stx.str -``` diff --git a/src/scitex/gen/_skills/dim-handler.md b/src/scitex/gen/_skills/dim-handler.md deleted file mode 100644 index 7468b70f..00000000 --- a/src/scitex/gen/_skills/dim-handler.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -description: DimHandler class in stx.gen — flattens non-target dimensions into a single batch dimension, performs computations on selected dimensions, then restores the original shape. Works with both torch.Tensor and numpy.ndarray. ---- - -# DimHandler - -`DimHandler` lets you operate on arbitrary selected dimensions of a multi-dimensional tensor or array without writing manual reshape/permute code. It stores the geometry from `fit()` and uses it to reverse the transformation in `unfit()`. - -> **Note:** Requires `torch`. `stx.gen.DimHandler` is `None` when torch is not installed. - -```python -from scitex.gen import DimHandler -``` - ---- - -## How it works - -Given an input with shape `(d0, d1, d2, d3, d4, d5)` and `keepdims=[0, 2, 5]`: - -1. `fit()` permutes to `(non-kept dims..., kept dims...)` → `(d1, d3, d4, d0, d2, d5)` -2. Flattens the leading non-kept axes → `(d1*d3*d4, d0, d2, d5)` — i.e. `(40, 1, 3, 6)` for `(1,2,3,4,5,6)` shaped input -3. You perform your computation on the kept-dim axes -4. `unfit()` restores the first (batch) axis to its original shape `(d1, d3, d4, ...)` - ---- - -## Constructor - -```python -DimHandler() -``` - -No parameters. State is stored after calling `fit()`. - ---- - -## fit - -```python -dh.fit(x, keepdims=[]) -> tensor / array -``` - -Reshapes `x` by flattening all dimensions **not** in `keepdims` into the leading batch dimension. The kept dimensions are moved to the trailing axes in their original relative order. - -| Parameter | Description | -|-----------|-------------| -| `x` | `torch.Tensor` or `numpy.ndarray` | -| `keepdims` | List of dimension indices to keep. Negative indices supported. | - -Returns the reshaped array. Also records `shape_fit`, `n_non_keepdims`, `n_keepdims` on the handler. - ---- - -## unfit - -```python -dh.unfit(y) -> tensor / array -``` - -Restores the first (batch) dimension back to the original non-kept dimensions. The trailing dimensions of `y` may differ from the original (e.g. after a reduction), and `unfit` handles this correctly. - -| Parameter | Description | -|-----------|-------------| -| `y` | Output after your computation. Must have the same batch size as returned by `fit`. | - ---- - -## Examples - -### Example 1 — lossless round-trip - -```python -import torch -from scitex.gen import DimHandler - -dh = DimHandler() -x = torch.rand(1, 2, 3, 4, 5, 6) -print(x.shape) # torch.Size([1, 2, 3, 4, 5, 6]) - -x_fit = dh.fit(x, keepdims=[0, 2, 5]) -print(x_fit.shape) # torch.Size([40, 1, 3, 6]) -# 40 = 2*4*5 (the non-kept dims) - -x_restored = dh.unfit(x_fit) -print(x_restored.shape) # torch.Size([2, 4, 5, 1, 3, 6]) -# Note: original dim order is not restored; non-kept dims come first -``` - -### Example 2 — computation that reduces a kept dimension - -```python -dh = DimHandler() -x = torch.rand(1, 2, 3, 4, 5, 6) - -x_fit = dh.fit(x, keepdims=[0, 2, 5]) -print(x_fit.shape) # torch.Size([40, 1, 3, 6]) - -# Reduce over dim=-2 (the "3" kept dimension) -y = x_fit.mean(axis=-2) -print(y.shape) # torch.Size([40, 1, 6]) - -y_restored = dh.unfit(y) -print(y_restored.shape) # torch.Size([2, 4, 5, 1, 6]) -``` - -### Example 3 — numpy array - -```python -import numpy as np -from scitex.gen import DimHandler - -dh = DimHandler() -x = np.random.rand(2, 3, 4) - -x_fit = dh.fit(x, keepdims=[1]) -print(x_fit.shape) # (8, 3) — 2*4 batch, 3 kept - -result = x_fit.sum(axis=-1, keepdims=True) -print(result.shape) # (8, 1) - -restored = dh.unfit(result) -print(restored.shape) # (2, 4, 1) -``` - ---- - -## Notes - -- `DimHandler` is **stateful**: a single instance should only be used for one `fit`/`unfit` pair at a time. Create a new instance for each independent operation. -- Negative keepdim indices are normalized before processing. -- The restored shape places non-kept dimensions first; the original permutation order is **not** restored. diff --git a/src/scitex/gen/_skills/environment-detection.md b/src/scitex/gen/_skills/environment-detection.md deleted file mode 100644 index 2b3a43c2..00000000 --- a/src/scitex/gen/_skills/environment-detection.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -description: Runtime environment detection in stx.gen — IPython/script detection (is_ipython, is_script), installed package listing (list_packages). Re-exports from stx.context are also available under stx.gen for backward compatibility. ---- - -# Environment Detection - ---- - -## is_ipython - -Returns `True` when running inside an IPython session (Jupyter notebook, IPython shell, IPdb). - -```python -is_ipython() -> bool -``` - -Implementation checks for the existence of `__IPYTHON__` in the global namespace. - -```python -import scitex as stx - -if stx.gen.is_ipython(): - stx.gen.less(long_output) # page with `less` in IPython -else: - print(long_output) -``` - ---- - -## is_script - -Returns `True` when running as a plain Python script (not inside IPython). - -```python -is_script() -> bool -``` - -Equivalent to `not is_ipython()`. - -```python -if stx.gen.is_script(): - # running as a regular script - import argparse - ... -``` - ---- - -## list_packages - -Lists all installed Python packages and their importable modules. Uses `importlib.metadata` internally. - -```python -list_packages( - max_depth: int = 1, - root_only: bool = True, - skip_errors: bool = True, - verbose: bool = False, -) -> pd.DataFrame -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `max_depth` | `1` | Depth of module traversal per package | -| `root_only` | `True` | Only list root-level modules (faster) | -| `skip_errors` | `True` | Silently skip packages that error on import | -| `verbose` | `False` | Print errors for failed packages | - -Returns a `pd.DataFrame` with a `"Name"` column. Common packages (numpy, pandas, torch, etc.) are listed first. Known problematic packages (`nvidia`, `cuda`, `pillow`, etc.) are skipped. - -```python -import scitex as stx - -df = stx.gen.list_packages(root_only=True) -print(df.head(10)) -# Name -# 0 numpy.array -# 1 numpy.mean -# ... -``` - -Delegates to `scitex.introspect.list_api` per package. - ---- - -## Context re-exports (backward compatibility) - -The following functions are re-exported from `scitex.context` for backward compatibility. **Prefer using `stx.context.*` directly in new code.** - -| Name | Preferred location | -|------|--------------------| -| `detect_environment()` | `stx.context.detect_environment` | -| `is_notebook()` | `stx.context.is_notebook` | -| `get_notebook_path()` | `stx.context.get_notebook_path` | -| `get_notebook_name()` | `stx.context.get_notebook_name` | -| `get_notebook_directory()` | `stx.context.get_notebook_directory` | -| `get_notebook_info_simple()` | `stx.context.get_notebook_info_simple` | -| `get_output_directory()` | `stx.context.get_output_directory` | - -```python -# Old code (still works, triggers no warning currently) -stx.gen.is_notebook() - -# Preferred -stx.context.is_notebook() -``` diff --git a/src/scitex/gen/_skills/interactive-tools.md b/src/scitex/gen/_skills/interactive-tools.md deleted file mode 100644 index 7e444641..00000000 --- a/src/scitex/gen/_skills/interactive-tools.md +++ /dev/null @@ -1,165 +0,0 @@ ---- -description: Interactive and filesystem utilities in stx.gen — less (pager), src (source viewer), paste (clipboard exec), embed (IPython shell), symlink, and title2path (string-to-path conversion). ---- - -# Interactive and Filesystem Tools - ---- - -## less - -Displays long text using the system `less` pager from within an IPython session. - -```python -less(output: str) -> None -``` - -Writes `output` to a temporary file and opens it with `get_ipython().system("less ")`. Cleans up the temporary file afterwards. - -> **Requires:** IPython / Jupyter environment. Will raise if called outside an IPython session. - -```python -import scitex as stx - -if stx.gen.is_ipython(): - stx.gen.less(long_string) -``` - ---- - -## src - -Displays the source code of any Python object using the system `less` pager. - -```python -src(obj: Any) -> None -``` - -If `obj` is a class instance (not a class, function, or method), it inspects `obj.__class__` automatically. Pipes the source code returned by `inspect.getsource(obj)` to a `less` subprocess. - -```python -import scitex as stx - -stx.gen.src(stx.gen.TimeStamper) # Shows TimeStamper source -stx.gen.src(stx.gen.to_z) # Shows to_z source - -ts = stx.gen.TimeStamper() -stx.gen.src(ts) # Also works with instances -``` - -Handles errors gracefully: -- `OSError` when source is unavailable (compiled extensions) -- `TypeError` for unsupported object types -- Prints error message without raising - ---- - -## paste - -Executes the current clipboard content as Python code in the calling scope. - -```python -paste() -> None -``` - -Uses `pyperclip` to read the clipboard and `textwrap.dedent` to strip leading indentation, then calls `exec()`. Prints an error message without raising if clipboard access fails or the code errors. - -> **Requires:** `pyperclip` - -```python -# Copy Python code to clipboard first, then: -stx.gen.paste() -``` - -**Interactive use:** Useful in IPython sessions to run code copied from documentation or a browser. - ---- - -## embed - -Opens an IPython shell with optional clipboard content execution. - -```python -embed() -> None -``` - -1. Reads clipboard via `pyperclip` -2. Asks interactively whether to execute the clipboard content (`y/n`) -3. Starts an IPython shell via `IPython.embed` -4. If confirmed, executes the clipboard content in the IPython session - -> **Requires:** `IPython`, `pyperclip`. Available only when torch is installed (wrapped in try/except in `__init__.py`). - -```python -stx.gen.embed() -# Opens IPython — press Ctrl-D to exit -``` - ---- - -## symlink - -Creates a symbolic link using a relative path. - -```python -symlink(tgt: str, src: str, force: bool = False) -> None -``` - -| Parameter | Description | -|-----------|-------------| -| `tgt` | Target (the file/directory to link to) | -| `src` | Source (the path where the symlink is created) | -| `force` | If `True`, remove an existing file at `src` before creating the symlink | - -The symlink is created as a **relative** path (computed from `src`'s directory to `tgt`), so it remains valid if the directory tree is moved. - -Prints a yellow-colored confirmation message on success. - -```python -import scitex as stx - -stx.gen.symlink( - tgt="/data/raw/session_001.mat", - src="/project/data/session_001.mat", -) -# Symlink was created: /project/data/session_001.mat -> ../../data/raw/session_001.mat - -# Overwrite an existing symlink: -stx.gen.symlink(tgt="/data/raw/v2.mat", src="/project/data/session_001.mat", force=True) -``` - ---- - -## title2path - -Converts a title string (or dict) to a filesystem-safe, lowercase path component. - -```python -title2path(title: str | dict) -> str -``` - -Transformations applied in order: -1. If `title` is a dict, converts to string via `scitex.dict.to_str` -2. Removes characters: `:`, `;`, `=`, `[`, `]` -3. Replaces `_-_` with `-` -4. Replaces spaces with `_` -5. Collapses consecutive `__` to `_` -6. Lowercases the result - -```python -import scitex as stx - -stx.gen.title2path("Subject 03: EEG [Alpha Band]") -# "subject_03_eeg_alpha_band" - -stx.gen.title2path("session_1_-_run_2") -# "session_1-run_2" -``` - -**Use case:** Generate consistent output directory names from plot titles or experiment labels. - -```python -title = "Condition A vs B: p=0.001" -out_dir = f"./results/{stx.gen.title2path(title)}/" -# "./results/condition_a_vs_b_p0.001/" -``` diff --git a/src/scitex/gen/_skills/numeric-utils.md b/src/scitex/gen/_skills/numeric-utils.md deleted file mode 100644 index 0d28954b..00000000 --- a/src/scitex/gen/_skills/numeric-utils.md +++ /dev/null @@ -1,196 +0,0 @@ ---- -description: Numeric helper functions in stx.gen — rounding to nearest even/odd integer, tensor rank conversion, symmetric log transform, named-dimension transpose, and numeric sequence helpers (connect_nums, float_linspace). ---- - -# Numeric Utilities - -Small but precise helpers for numeric type coercion, transforms, and array operations. - ---- - -## to_even - -Converts any real number to the nearest even integer **less than or equal** to itself (floor-then-even). - -```python -to_even(n: int | float) -> int -``` - -| Input | Output | -|-------|--------| -| `5` | `4` | -| `6` | `6` | -| `3.7` | `2` | -| `-2.3` | `-4` | -| `-0.1` | `-2` | - -Raises `ValueError` for `NaN`, `OverflowError` for `±inf`, `TypeError` for strings. - -```python -import scitex as stx - -stx.gen.to_even(101) # 100 -stx.gen.to_even(200) # 200 -stx.gen.to_even(7.9) # 6 -``` - -**Common use case:** Ensure an FFT window length is even. - -```python -n_fft = stx.gen.to_even(int(fs * 0.025)) # 25 ms window, forced even -``` - ---- - -## to_odd - -Converts any real number to the nearest odd integer **less than or equal** to itself. - -```python -to_odd(n: int | float) -> int -``` - -| Input | Output | -|-------|--------| -| `6` | `5` | -| `7` | `7` | -| `5.8` | `5` | - -```python -kernel_size = stx.gen.to_odd(int(fs * 0.010)) # 10 ms kernel, forced odd -``` - -**Implementation:** `int(n) - ((int(n) + 1) % 2)` — compact and branch-free. - ---- - -## to_rank - -Converts a 1-D tensor to its rank vector (1-based). - -```python -to_rank(tensor, method="average") -> torch.Tensor -``` - -> **Note:** Requires `torch`. Returns `None` when torch is not installed. - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `tensor` | required | 1-D `torch.Tensor` (or array via `@torch_fn`) | -| `method` | `"average"` | Tie-breaking: `"average"` assigns the mean rank to tied values | - -```python -import torch -import scitex as stx - -x = torch.tensor([3.0, 1.0, 2.0, 1.0]) -stx.gen.to_rank(x) -# tensor([4., 1.5, 3., 1.5]) — tied values at positions 1 and 3 get average rank -``` - ---- - -## symlog - -Symmetric log transform: linear near zero, logarithmic for large magnitudes. Preserves sign. - -```python -symlog(x, linthresh=1.0) -> array-like -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `x` | required | Input array or tensor | -| `linthresh` | `1.0` | Width of the linear region around zero | - -Formula: `sign(x) * log1p(|x| / linthresh)` - -```python -import numpy as np -import scitex as stx - -x = np.array([-1000, -1, 0, 1, 1000]) -stx.gen.symlog(x, linthresh=1.0) -# array([-6.908, -0.693, 0., 0.693, 6.908]) -``` - -**Use case:** Plotting neural signals or financial data with large dynamic range and values near zero. - ---- - -## transpose - -Name-based dimension transposition for arrays. Accepts dimension name arrays instead of integer indices. - -```python -transpose(arr_like, src_dims, tgt_dims) -> np.ndarray -``` - -Decorated with `@numpy_fn` — accepts torch.Tensor or list inputs. - -| Parameter | Description | -|-----------|-------------| -| `arr_like` | Input array | -| `src_dims` | Array of dimension names in current order | -| `tgt_dims` | Array of dimension names in desired order (same elements, different order) | - -`src_dims` and `tgt_dims` must contain identical sets of names. - -```python -import numpy as np -import scitex as stx - -x = np.random.rand(2, 3, 4) -src = np.array(["batch", "time", "freq"]) -tgt = np.array(["freq", "batch", "time"]) - -y = stx.gen.transpose(x, src, tgt) -print(y.shape) # (4, 2, 3) -``` - ---- - -## connect_nums - -Joins an iterable of values into a hyphen-separated string. - -```python -connect_nums(nums: Iterable) -> str -``` - -```python -import scitex as stx - -stx.gen.connect_nums((0, 0)) # "0-0" -stx.gen.connect_nums((1, 2, 3)) # "1-2-3" -stx.gen.connect_nums(("a", "b")) # "a-b" -``` - -**Use case:** Building unique filename stems from parameter tuples. - -```python -fname = f"result_{stx.gen.connect_nums((subject_id, session_id, run))}.csv" -# "result_3-2-1.csv" -``` - ---- - -## float_linspace - -Generates evenly spaced floats over an interval. Similar to `np.linspace` but guarantees step-based arithmetic (avoids floating-point drift in edge cases). - -```python -float_linspace(start: float, stop: float, num_points: int) -> np.ndarray -``` - -```python -import scitex as stx - -stx.gen.float_linspace(0, 1, 5) -# array([0. , 0.25, 0.5 , 0.75, 1. ]) - -stx.gen.float_linspace(1, 2, 3) -# array([1. , 1.5, 2. ]) -``` - -When `num_points < 2`, returns `[start]` for `num_points == 1` or `[start, stop]` for `num_points == 2`. diff --git a/src/scitex/gen/_skills/tensor-normalization.md b/src/scitex/gen/_skills/tensor-normalization.md deleted file mode 100644 index 8698af5c..00000000 --- a/src/scitex/gen/_skills/tensor-normalization.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -description: Tensor and array normalization functions in stx.gen — z-score, min-max, NaN-safe variants, unbiasing, percentile clipping. All accept both torch.Tensor and numpy.ndarray (via @torch_fn decorator). Optional caching layer for repeated operations. ---- - -# Tensor Normalization - -All functions accept `torch.Tensor` or `numpy.ndarray` thanks to the `@torch_fn` decorator. NumPy arrays are converted to tensors, computed, and returned as tensors (or arrays if the input was NumPy). All normalization is **along a single dimension** by default. - -> **Note:** Requires `torch`. Functions are `None` when torch is not installed. - ---- - -## to_z - -Z-score normalization: zero mean, unit variance along a dimension. - -```python -to_z(x, axis=-1, dim=None, device="cuda") -> torch.Tensor -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `x` | required | Input tensor or array | -| `dim` | `None` | Preferred: dimension along which to normalize | -| `axis` | `-1` | Numpy compat alias for `dim` | -| `device` | `"cuda"` | Computation device (auto-fallback to cpu) | - -```python -import scitex as stx -import torch - -x = torch.randn(4, 100) -z = stx.gen.to_z(x, dim=-1) -# z.mean(dim=-1) ≈ 0, z.std(dim=-1) ≈ 1 -``` - ---- - -## to_nanz - -Z-score normalization with NaN handling using `torch.nanmean` and `scitex.torch.nanstd`. - -```python -to_nanz(x, axis=-1, dim=None, device="cuda") -> torch.Tensor -``` - -Same signature as `to_z`. NaN values in `x` are skipped when computing mean and std; they remain NaN in the output. - -```python -x = torch.tensor([1.0, 2.0, float("nan"), 4.0]) -z = stx.gen.to_nanz(x, dim=0) -``` - ---- - -## to_01 - -Min-max normalization to the [0, 1] range. - -```python -to_01(x, axis=-1, dim=None, device="cuda") -> torch.Tensor -``` - -Uses `1e-8` epsilon to avoid division by zero when min == max. - -```python -x = torch.tensor([2.0, 4.0, 6.0, 8.0]) -scaled = stx.gen.to_01(x, dim=0) -# tensor([0.000, 0.333, 0.667, 1.000]) -``` - ---- - -## to_nan01 - -Min-max normalization with NaN handling using `torch.nanmin` / `torch.nanmax`. - -```python -to_nan01(x, axis=-1, dim=None, device="cuda") -> torch.Tensor -``` - -NaN values remain NaN in the output; min/max are computed over non-NaN values. - ---- - -## unbias - -Removes bias (mean or min) from a tensor along a dimension. - -```python -unbias(x, axis=-1, dim=None, fn="mean", device="cuda") -> torch.Tensor -``` - -| `fn` value | Operation | -|------------|-----------| -| `"mean"` | Subtracts `x.mean(dim=dim, keepdims=True)` | -| `"min"` | Subtracts `x.min(dim=dim, keepdims=True)[0]` | - -```python -x = torch.tensor([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) -debiased = stx.gen.unbias(x, dim=-1, fn="mean") -# Each row now has mean 0 -``` - ---- - -## clip_perc - -Clips values to the range [lower percentile, upper percentile] along a dimension. - -```python -clip_perc( - x, - lower_perc=2.5, - upper_perc=97.5, - low=None, - high=None, - axis=-1, - dim=None, - device="cuda", -) -> torch.Tensor -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `lower_perc` | `2.5` | Lower bound percentile (0–100) | -| `upper_perc` | `97.5` | Upper bound percentile (0–100) | -| `low` | `None` | Alternative name for `lower_perc` | -| `high` | `None` | Alternative name for `upper_perc` | - -```python -x = torch.randn(1000) -clipped = stx.gen.clip_perc(x, lower_perc=5, upper_perc=95, dim=0) -``` - ---- - -## Caching Layer (norm_cache) - -`_norm_cache.py` provides optional caching for repeated normalizations of identical data. By default, it **auto-patches** `to_z` and `to_01` in `scitex.gen` when the environment variable `SCITEX_CACHE_NORM=true` (the default). - -### Cache control functions - -```python -from scitex.gen._norm_cache import configure_norm_cache, clear_norm_cache, get_norm_cache_info - -# Tune cache -configure_norm_cache(enabled=True, max_size=128, verbose=True) - -# Inspect -info = get_norm_cache_info() -# {'enabled': True, 'max_size': 128, 'current_size': 3, 'operations': ['z-score', ...]} - -# Clear all cached results -clear_norm_cache() -``` - -### Cached variants - -```python -from scitex.gen._norm_cache import to_z_cached, to_01_cached - -# Drop-in replacements with LRU-style weak-reference caching -z = to_z_cached(x, dim=-1) -scaled = to_01_cached(x, dim=0) -``` - -Cache keys are based on array shape, dtype, device, and sampled values. The cache uses `weakref.WeakValueDictionary` so it does not prevent garbage collection. - -### Disable caching - -``` -SCITEX_CACHE_NORM=false python my_script.py -``` diff --git a/src/scitex/gen/_skills/timestamper.md b/src/scitex/gen/_skills/timestamper.md deleted file mode 100644 index 7dc6705f..00000000 --- a/src/scitex/gen/_skills/timestamper.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -description: TimeStamper class in stx.gen — records elapsed time with comments, returns formatted strings, and stores a pandas DataFrame of all checkpoint events. Useful for profiling multi-stage pipelines. ---- - -# TimeStamper - -A callable class that measures elapsed time from object creation and between successive calls. Each call records a labeled checkpoint and returns a formatted string. All records are accessible as a `pandas.DataFrame`. - -```python -from scitex.gen import TimeStamper -``` - ---- - -## Constructor - -```python -TimeStamper(is_simple: bool = True) -> TimeStamper -``` - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `is_simple` | `True` | `True`: compact format `"ID:0 | 00:00:01 label |"`. `False`: verbose format with both total and delta times. | - ---- - -## Calling the stamper - -```python -ts(comment: str = "", verbose: bool = False) -> str -``` - -Returns a formatted timestamp string and records the checkpoint internally. - -| Parameter | Default | Description | -|-----------|---------|-------------| -| `comment` | `""` | Label for this checkpoint | -| `verbose` | `False` | If `True`, also prints the string to stdout | - -**Simple format** (`is_simple=True`): -``` -"ID:0 | 00:00:01 Starting process | " -``` - -**Verbose format** (`is_simple=False`): -``` -"Time (id:0): total 00:00:01, prev 00:00:01 [hh:mm:ss]: Starting process\n" -``` - ---- - -## record property - -```python -ts.record -> pd.DataFrame -``` - -Returns a DataFrame with columns: `timestamp`, `elapsed_since_start`, `elapsed_since_prev`, `comment`. The `formatted_text` column is excluded. - ---- - -## delta method - -```python -ts.delta(id1: int, id2: int) -> float -``` - -Returns the difference in seconds between two checkpoint timestamps. Supports negative indices (Python-style). - -| Parameter | Description | -|-----------|-------------| -| `id1` | First checkpoint ID | -| `id2` | Second checkpoint ID | - -Raises `ValueError` if either ID does not exist. - ---- - -## Full example - -```python -import time -import scitex as stx - -ts = stx.gen.TimeStamper(is_simple=True) - -ts("Loading data", verbose=True) -# ID:0 | 00:00:00 Loading data | - -time.sleep(1) -ts("Preprocessing", verbose=True) -# ID:1 | 00:00:01 Preprocessing | - -time.sleep(2) -ts("Training", verbose=True) -# ID:2 | 00:00:03 Training | - -# DataFrame of all checkpoints -print(ts.record) -# timestamp elapsed_since_start elapsed_since_prev comment -# 0 ... 0.000 0.000 Loading data -# 1 ... 1.002 1.002 Preprocessing -# 2 ... 3.004 2.002 Training - -# Time between step 1 and step 0 (positive = id1 is later) -diff = ts.delta(1, 0) -# ≈ 1.002 - -# Negative index: last minus first -diff = ts.delta(-1, 0) -# ≈ 3.004 -``` - ---- - -## Profiling pipelines - -```python -ts = stx.gen.TimeStamper(is_simple=False) - -for i, batch in enumerate(dataloader): - ts(f"batch {i} loaded") - result = model(batch) - ts(f"batch {i} forward") - -# All timings in one DataFrame -print(ts.record[["elapsed_since_prev", "comment"]]) -``` diff --git a/src/scitex/gen/_skills/xml-matlab.md b/src/scitex/gen/_skills/xml-matlab.md deleted file mode 100644 index 4fdb9725..00000000 --- a/src/scitex/gen/_skills/xml-matlab.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -description: XML parsing and MATLAB .mat file loading utilities in stx.gen — xml2dict converts XML files to nested dicts, while mat2dict/mat2npa/dir2npy load .mat files into Python/NumPy structures. ---- - -# XML and MATLAB Utilities - ---- - -## XML Parsing - -### xml2dict - -Parses an XML file into a nested Python dict. - -```python -xml2dict(lpath_xml: str) -> XmlDictConfig -``` - -Returns an `XmlDictConfig` instance (a subclass of `dict`). - -```python -import scitex as stx - -cfg = stx.gen.xml2dict("/path/to/config.xml") -print(cfg["root"]["section"]["key"]) -``` - -### XmlDictConfig - -A `dict` subclass that recursively converts an `xml.etree.ElementTree` element and its children into a Python dict. Nested elements become nested dicts; repeated tags at the same level become an `XmlListConfig`. - -```python -from xml.etree import cElementTree as ElementTree -from scitex.gen import XmlDictConfig - -tree = ElementTree.parse("your_file.xml") -root = tree.getroot() -xmldict = XmlDictConfig(root) -# Use like a plain dict -value = xmldict["section"]["key"] -``` - -**Rules:** -- Element with a single child or children with **different** tags → `XmlDictConfig` -- Element with children that all share the **same** tag → `XmlListConfig` -- Element with text content (no children, no attributes) → stored as a string value -- Element attributes are merged into the dict - -### XmlListConfig - -A `list` subclass that converts a sequence of same-tagged XML elements into a Python list. Elements that have children are wrapped in `XmlDictConfig`; plain text elements are appended as strings. - -```python -from scitex.gen import XmlListConfig -``` - ---- - -## MATLAB .mat File Utilities - -Legacy helpers for loading MATLAB `.mat` files. Tries HDF5 format first (`h5py`), falls back to `scipy.io.loadmat`. - -### mat2dict - -```python -mat2dict(fname: str) -> dict -``` - -Returns a dict mapping variable names to their values. Adds a `"__hdf__"` key (`True`/`False`) indicating which backend was used. - -```python -import scitex as stx - -d = stx.gen.mat2dict("/data/recording.mat") -signal = d["eeg"] # h5py Dataset or numpy array depending on format -is_hdf = d["__hdf__"] -``` - -### public_keys - -```python -public_keys(d: dict) -> list -``` - -Returns keys from a mat2dict result that do not start with `_` (i.e., user variables, not MATLAB metadata). - -```python -keys = stx.gen.public_keys(d) -``` - -### save_npa - -```python -save_npa(fname: str, x: np.ndarray) -> None -``` - -Saves a numpy array to `fname` using `np.save`. - -### mat2npy - -```python -mat2npy(fname: str, typ: type) -> None -``` - -Loads the first variable from a `.mat` file and saves it as a `.npy` file alongside the original (`.mat` extension replaced). - -> **Warning:** Contains `pdb.set_trace()` calls in `mat2npa` and `keys2npa` — these are legacy debugging artifacts. For production use, call `mat2dict` directly. - -### dir2npy - -```python -dir2npy(dir: str, typ: type, regex: str = "*") -> None -``` - -Converts all `regex + ".mat"` files in `dir` to `.npy` files. Changes the working directory to `dir` during execution. - -```python -stx.gen.dir2npy("/data/eeg/", typ=np.float32, regex="*xdata") -``` - ---- - -## Recommended alternatives - -For new code, prefer: -- XML → `stx.io.load("file.xml")` (if supported) or `xml2dict` directly -- MATLAB → `stx.io.load("file.mat")` which wraps both `h5py` and `scipy.io.loadmat` diff --git a/src/scitex/gen/_src.py b/src/scitex/gen/_src.py deleted file mode 100755 index 212862a7..00000000 --- a/src/scitex/gen/_src.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-06-13 22:44:28 (ywatanabe)" -# File: /ssh:sp:/home/ywatanabe/proj/SciTeX-Code/src/scitex/gen/_src.py -# ---------------------------------------- -import os - -__FILE__ = __file__ -__DIR__ = os.path.dirname(__FILE__) -# ---------------------------------------- - -#!./env/bin/python3 - -import inspect -import subprocess - - -def src(obj): - """ - Returns the source code of a given object using `less`. - Handles functions, classes, class instances, methods, and built-in functions. - """ - # If obj is an instance of a class, get the class of the instance. - if ( - not inspect.isclass(obj) - and not inspect.isfunction(obj) - and not inspect.ismethod(obj) - ): - obj = obj.__class__ - - try: - # Attempt to retrieve the source code - source_code = inspect.getsource(obj) - - # Assuming scitex.gen.less is a placeholder for displaying text with `less` - # This part of the code is commented out as it seems to be a placeholder - # scitex.gen.less(source_code) - - # Open a subprocess to use `less` for displaying the source code - process = subprocess.Popen(["less"], stdin=subprocess.PIPE, encoding="utf8") - process.communicate(input=source_code) - if process.returncode != 0: - print(f"Process exited with return code {process.returncode}") - except OSError as e: - # Handle cases where the source code cannot be retrieved (e.g., built-in functions) - print(f"Cannot retrieve source code: {e}") - except TypeError as e: - # Handle cases where the object type is not supported - print(f"TypeError: {e}") - except Exception as e: - # Handle any other unexpected errors - print(f"Error: {e}") - - -# def src(obj): -# """ -# Returns the source code of a given object using `less`. -# Handles functions, classes, class instances, and methods. -# """ -# # If obj is an instance of a class, get the class of the instance. -# if ( -# not inspect.isclass(obj) -# and not inspect.isfunction(obj) -# and not inspect.ismethod(obj) -# ): -# obj = obj.__class__ - -# try: -# # Attempt to retrieve the source code -# source_code = inspect.getsource(obj) -# scitex.gen.less(source_code) - -# # # Open a subprocess to use `less` for displaying the source code -# # process = subprocess.Popen( -# # ["less"], stdin=subprocess.PIPE, encoding="utf8" -# # ) -# # process.communicate(input=source_code) -# if process.returncode != 0: -# print(f"Process exited with return code {process.returncode}") -# except TypeError as e: -# # Handle cases where the object type is not supported -# print(f"TypeError: {e}") -# except Exception as e: -# # Handle any other unexpected errors -# print(f"Error: {e}") - -# (YOUR AWESOME CODE) - -if __name__ == "__main__": - import sys - - import matplotlib.pyplot as plt - - # Start - CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.session.start( - sys, plt, verbose=False - ) - import sys - - # (YOUR AWESOME CODE) - # Close - scitex.session.close(CONFIG, verbose=False, notify=False) - -""" -/ssh:ywatanabe@444:/home/ywatanabe/proj/entrance/scitex/gen/_def.py -""" - -# EOF diff --git a/src/scitex/gen/_symlink.py b/src/scitex/gen/_symlink.py deleted file mode 100755 index c6a10f7c..00000000 --- a/src/scitex/gen/_symlink.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# Time-stamp: "2024-11-02 13:29:31 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_symlink.py - -import os - -from scitex.str import color_text - - -def symlink(tgt, src, force=False): - """Create a symbolic link. - - This function creates a symbolic link from the target to the source. - If the force parameter is True, it will remove any existing file at - the source path before creating the symlink. - - Parameters - ---------- - tgt : str - The target path (the file or directory to be linked to). - src : str - The source path (where the symbolic link will be created). - force : bool, optional - If True, remove the existing file at the src path before creating - the symlink (default is False). - - Returns - ------- - None - - Raises - ------ - OSError - If the symlink creation fails. - - Example - ------- - >>> symlink('/path/to/target', '/path/to/link') - >>> symlink('/path/to/target', '/path/to/existing_file', force=True) - """ - if force: - try: - os.remove(src) - except FileNotFoundError: - pass - - # Calculate the relative path from src to tgt - src_dir = os.path.dirname(src) - relative_tgt = os.path.relpath(tgt, src_dir) - - os.symlink(relative_tgt, src) - print(color_text(f"\nSymlink was created: {src} -> {relative_tgt}\n", c="yellow")) - - -# EOF diff --git a/src/scitex/gen/_symlog.py b/src/scitex/gen/_symlog.py deleted file mode 100755 index 6c4518f1..00000000 --- a/src/scitex/gen/_symlog.py +++ /dev/null @@ -1,27 +0,0 @@ -#!./env/bin/python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-07-06 07:16:38 (ywatanabe)" -# ./src/scitex/gen/_symlog.py - -import numpy as np - - -def symlog(x, linthresh=1.0): - """ - Apply a symmetric log transformation to the input data. - - Parameters - ---------- - x : array-like - Input data to be transformed. - linthresh : float, optional - Range within which the transformation is linear. Defaults to 1.0. - - Returns - ------- - array-like - Symmetrically transformed data. - """ - sign_x = np.sign(x) - abs_x = np.abs(x) - return sign_x * (np.log1p(abs_x / linthresh)) diff --git a/src/scitex/gen/_title2path.py b/src/scitex/gen/_title2path.py deleted file mode 100755 index e016dbe7..00000000 --- a/src/scitex/gen/_title2path.py +++ /dev/null @@ -1,60 +0,0 @@ -#!./env/bin/python3 -# -*- coding: utf-8 -*- -# Time-stamp: 2024-05-12 21:02:21 (7) -# /sshx:ywatanabe@444:/home/ywatanabe/proj/scitex/src/scitex/gen/_title2spath.py - - -def title2path(title): - """ - Convert a title (string or dictionary) to a path-friendly string. - - Parameters - ---------- - title : str or dict - The input title to be converted. - - Returns - ------- - str - A path-friendly string derived from the input title. - """ - if isinstance(title, dict): - from scitex.dict import to_str - - title = to_str(title) - - path = title - - patterns = [":", ";", "=", "[", "]"] - for pattern in patterns: - path = path.replace(pattern, "") - - path = path.replace("_-_", "-") - path = path.replace(" ", "_") - - while "__" in path: - path = path.replace("__", "_") - - return path.lower() - - -# def title2path(title): -# if isinstance(title, dict): -# title = dict2str(title) - -# path = title - -# # Comma patterns -# patterns = [":", ";", "=", "[", "]"] -# for pp in patterns: -# path = path.replace(pp, "") - -# # Exceptions -# path = path.replace("_-_", "-") -# path = path.replace(" ", "_") - -# # Consective under scores -# for _ in range(10): -# path = path.replace("__", "_") - -# return path.lower() diff --git a/src/scitex/gen/_to_even.py b/src/scitex/gen/_to_even.py deleted file mode 100755 index 177b4b4e..00000000 --- a/src/scitex/gen/_to_even.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-25 23:40:12 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_to_even.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/_to_even.py" - - -def to_even(n): - """Convert a number to the nearest even number less than or equal to itself. - - Parameters - ---------- - n : int or float - The input number to be converted. - - Returns - ------- - int - The nearest even number less than or equal to the input. - - Example - ------- - >>> to_even(5) - 4 - >>> to_even(6) - 6 - >>> to_even(3.7) - 2 - >>> to_even(-2.3) - -4 - >>> to_even(-0.1) - -2 - """ - import math - - # Handle integers directly to avoid float conversion issues with large numbers - # Note: bool is a subclass of int, so we need to exclude it - if isinstance(n, int) and not isinstance(n, bool): - if n % 2 == 0: - return int(n) # Ensure we return int, not bool - else: - return int(n - 1) # Ensure we return int, not bool - - # Handle special float values - if isinstance(n, float): - if math.isnan(n): - raise ValueError("Cannot convert NaN to even") - if math.isinf(n): - raise OverflowError("Cannot convert infinity to even") - # Python can actually convert sys.float_info.max to int, so we don't need this check - # Only infinity truly can't be converted - - # Try to handle custom objects with __int__ (but not float types) - if hasattr(n, "__int__") and not isinstance(n, (float, bool)): - try: - n_int = int(n) - if n_int % 2 == 0: - return int(n_int) - else: - return int(n_int - 1) - except: - pass - - # Check for string type explicitly - raise TypeError - if isinstance(n, str): - raise TypeError(f"must be real number, not {type(n).__name__}") - - # Convert to float for all other cases - try: - n_float = float(n) - except (TypeError, ValueError): - raise TypeError(f"must be real number, not {type(n).__name__}") - - # Use floor for float values - floored = int(math.floor(n_float)) - - # If odd, subtract 1 to get the next lower even number - if floored % 2 != 0: - return int(floored - 1) # Ensure we return int, not bool - return int(floored) # Ensure we return int, not bool - - -# EOF diff --git a/src/scitex/gen/_to_odd.py b/src/scitex/gen/_to_odd.py deleted file mode 100755 index a83cc340..00000000 --- a/src/scitex/gen/_to_odd.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-25 23:40:22 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_to_odd.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/_to_odd.py" - - -def to_odd(n): - """Convert a number to the nearest odd number less than or equal to itself. - - Parameters - ---------- - n : int or float - The input number to be converted. - - Returns - ------- - int - The nearest odd number less than or equal to the input. - - Example - ------- - >>> to_odd(6) - 5 - >>> to_odd(7) - 7 - >>> to_odd(5.8) - 5 - """ - return int(n) - ((int(n) + 1) % 2) - - -# EOF diff --git a/src/scitex/gen/_to_rank.py b/src/scitex/gen/_to_rank.py deleted file mode 100755 index c69c2d5e..00000000 --- a/src/scitex/gen/_to_rank.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-02 13:05:47 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_to_rank.py -#!./env/bin/python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-08-29 22:10:06 (ywatanabe)" -# ./src/scitex/gen/data_processing/_to_rank.py - -import torch - -# from .._converters import -from scitex.decorators import torch_fn - - -@torch_fn -def to_rank(tensor, method="average"): - sorted_tensor, indices = torch.sort(tensor) - ranks = torch.empty_like(tensor) - ranks[indices] = ( - torch.arange(len(tensor), dtype=tensor.dtype, device=tensor.device) + 1 - ) - - if method == "average": - ranks = ranks.float() - ties = torch.nonzero(sorted_tensor[1:] == sorted_tensor[:-1]) - for i in range(len(ties)): - start = ties[i] - end = start + 1 - while ( - end < len(sorted_tensor) and sorted_tensor[end] == sorted_tensor[start] - ): - end += 1 - ranks[indices[start:end]] = ranks[indices[start:end]].mean() - - return ranks - - -# EOF diff --git a/src/scitex/gen/_transpose.py b/src/scitex/gen/_transpose.py deleted file mode 100755 index 4a762b3e..00000000 --- a/src/scitex/gen/_transpose.py +++ /dev/null @@ -1,38 +0,0 @@ -#!./env/bin/python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-08-24 09:47:16 (ywatanabe)" -# ./src/scitex/gen/_transpose.py - -import numpy as np - -from scitex.decorators import numpy_fn - - -@numpy_fn -def transpose(arr_like, src_dims, tgt_dims): - """ - Transpose an array-like object based on source and target dimensions. - - Parameters - ---------- - arr_like : np.array - The input array to be transposed. - src_dims : np.array - List of dimension names in the source order. - tgt_dims : np.array - List of dimension names in the target order. - - Returns - ------- - np.array - The transposed array. - - Raises - ------ - AssertionError - If source and target dimensions don't contain the same elements. - """ - assert set(src_dims) == set( - tgt_dims - ), "Source and target dimensions must contain the same elements" - return arr_like.transpose(*[np.where(src_dims == dim)[0][0] for dim in tgt_dims]) diff --git a/src/scitex/gen/_type.py b/src/scitex/gen/_type.py deleted file mode 100755 index 8c667412..00000000 --- a/src/scitex/gen/_type.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-17 12:45:50 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_type.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/_type.py" - -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-03 10:33:13 (ywatanabe)" -# File: placeholder.py - -from typing import Any, Union - -import numpy as np -import pandas as pd -import torch -import xarray as xr - -ArrayLike = Union[ - list, tuple, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray, torch.Tensor -] - - -def var_info(variable: Any) -> dict: - """Returns type and structural information about a variable. - - Example - ------- - >>> data = np.array([[1, 2], [3, 4]]) - >>> info = var_info(data) - >>> print(info) - { - 'type': 'numpy.ndarray', - 'length': 2, - 'shape': (2, 2), - 'dimensions': 2 - } - - Parameters - ---------- - variable : Any - Variable to inspect. - - Returns - ------- - dict - Dictionary containing variable information. - """ - info = {"type": type(variable).__name__} - - # Length check - if hasattr(variable, "__len__"): - info["length"] = len(variable) - - # Shape check for array-like objects - if isinstance( - variable, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, torch.Tensor) - ): - info["shape"] = variable.shape - info["dimensions"] = len(variable.shape) - - # Special handling for nested lists - elif isinstance(variable, list): - if variable and isinstance(variable[0], list): - depth = 1 - current = variable - shape = [len(variable)] - while current and isinstance(current[0], list): - shape.append(len(current[0])) - current = current[0] - depth += 1 - info["shape"] = tuple(shape) - info["dimensions"] = depth - - return info - - -# EOF diff --git a/src/scitex/gen/_var_info.py b/src/scitex/gen/_var_info.py deleted file mode 100755 index 0771c296..00000000 --- a/src/scitex/gen/_var_info.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-25 00:35:31 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_var_info.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/_var_info.py" - -from typing import Any, Union - -import numpy as np -import pandas as pd -import torch -import xarray as xr - -ArrayLike = Union[ - list, tuple, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray, torch.Tensor -] - - -def var_info(variable: Any) -> dict: - """Returns type and structural information about a variable. - - Example - ------- - >>> data = np.array([[1, 2], [3, 4]]) - >>> info = var_info(data) - >>> print(info) - { - 'type': 'numpy.ndarray', - 'length': 2, - 'shape': (2, 2), - 'dimensions': 2 - } - - Parameters - ---------- - variable : Any - Variable to inspect. - - Returns - ------- - dict - Dictionary containing variable information. - """ - info = {"type": type(variable).__name__} - - # Length check - if hasattr(variable, "__len__"): - info["length"] = len(variable) - - # Shape check for array-like objects - if isinstance( - variable, (np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, torch.Tensor) - ): - info["shape"] = variable.shape - info["dimensions"] = len(variable.shape) - - # Special handling for nested lists - elif isinstance(variable, list): - if variable and isinstance(variable[0], list): - depth = 1 - current = variable - shape = [len(variable)] - while current and isinstance(current[0], list): - shape.append(len(current[0])) - current = current[0] - depth += 1 - info["shape"] = tuple(shape) - info["dimensions"] = depth - - return info - - -# EOF diff --git a/src/scitex/gen/_wrap.py b/src/scitex/gen/_wrap.py deleted file mode 100755 index a4980b59..00000000 --- a/src/scitex/gen/_wrap.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-04 02:13:22 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/_wrap.py - -import functools - - -def wrap(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - - return wrapper - - -# EOF diff --git a/src/scitex/gen/_xml2dict.py b/src/scitex/gen/_xml2dict.py deleted file mode 100755 index 07de1d14..00000000 --- a/src/scitex/gen/_xml2dict.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# Time-stamp: "2021-09-07 13:06:33 (ylab)" - -from xml.etree import cElementTree as ElementTree - - -def xml2dict(lpath_xml): - # tree = ElementTree.parse('your_file.xml') - tree = ElementTree.parse(lpath_xml) - root = tree.getroot() - xmldict = XmlDictConfig(root) - return xmldict - - -class XmlListConfig(list): - def __init__(self, aList): - for element in aList: - if element: - # treat like dict - if len(element) == 1 or element[0].tag != element[1].tag: - self.append(XmlDictConfig(element)) - # treat like list - elif element[0].tag == element[1].tag: - self.append(XmlListConfig(element)) - elif element.text: - text = element.text.strip() - if text: - self.append(text) - - -class XmlDictConfig(dict): - """ - Example usage: - - >>> tree = ElementTree.parse('your_file.xml') - >>> root = tree.getroot() - >>> xmldict = XmlDictConfig(root) - - Or, if you want to use an XML string: - - >>> root = ElementTree.XML(xml_string) - >>> xmldict = XmlDictConfig(root) - - And then use xmldict for what it is... a dict. - """ - - def __init__(self, parent_element): - if parent_element.items(): - self.update(dict(parent_element.items())) - for element in parent_element: - if element: - # treat like dict - we assume that if the first two tags - # in a series are different, then they are all different. - if len(element) == 1 or element[0].tag != element[1].tag: - aDict = XmlDictConfig(element) - # treat like list - we assume that if the first two tags - # in a series are the same, then the rest are the same. - else: - # here, we put the list in dictionary; the key is the - # tag name the list elements all share in common, and - # the value is the list itself - aDict = {element[0].tag: XmlListConfig(element)} - # if the tag has attributes, add those to the dict - if element.items(): - aDict.update(dict(element.items())) - self.update({element.tag: aDict}) - # this assumes that if you've got an attribute in a tag, - # you won't be having any text. This may or may not be a - # good idea -- time will tell. It works for the way we are - # currently doing XML configuration files... - elif element.items(): - self.update({element.tag: dict(element.items())}) - # finally, if there are no child tags and no attributes, extract - # the text - else: - self.update({element.tag: element.text}) diff --git a/src/scitex/gen/misc.py b/src/scitex/gen/misc.py deleted file mode 100755 index bb91891a..00000000 --- a/src/scitex/gen/misc.py +++ /dev/null @@ -1,754 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-16 16:26:59 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/misc.py - -THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/gen/misc.py" - -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Time-stamp: "2024-11-02 12:50:29 (ywatanabe)" -# File: ./scitex_repo/src/scitex/gen/misc.py - -import math -import os -import shutil -import threading -import time -import warnings -from bisect import bisect_left -from functools import wraps - -import numpy as np -import pandas as pd -import readchar -import torch - - -def find_closest(list_obj, num_insert): - """Find the closest value in a sorted list to a given number. - - Parameters - ---------- - list_obj : list - A sorted list of numbers. - num_insert : float or int - The number to find the closest value to. - - Returns - ------- - tuple - A tuple containing (closest_value, index_of_closest_value). - - Example - ------- - >>> find_closest([1, 3, 5, 7, 9], 6) - (5, 2) - >>> find_closest([1, 3, 5, 7, 9], 8) - (7, 3) - """ - """ - Assumes list_obj is sorted. Returns the closest value to num. - If the same number is included in list_obj, the smaller number is returned. - - Example: - list_obj = np.array([0, 1, 1, 2, 3, 3]) - num = 1.2 - closest_num, closest_pos = take_the_closest(list_obj, num) - print(closest_num, closest_pos) - # 1 2 - - list_obj = np.array([0, 1, 1, 2, 3, 3]) - num = 1 - closest_num, closest_pos = take_the_closest(list_obj, num) - print(closest_num, closest_pos) - # 1 1 - """ - if math.isnan(num_insert): - closest_num = np.nan - closest_pos = np.nan - - pos_num_insert = bisect_left(list_obj, num_insert) - - if pos_num_insert == 0: - closest_num = list_obj[0] - closest_pos = pos_num_insert - - if pos_num_insert == len(list_obj): - closest_num = list_obj[-1] - closest_pos = pos_num_insert - - else: - pos_before = pos_num_insert - 1 - - before_num = list_obj[pos_before] - after_num = list_obj[pos_num_insert] - - delta_after = abs(after_num - num_insert) - delta_before = abs(before_num - num_insert) - - if np.abs(delta_after) < np.abs(delta_before): - closest_num = after_num - closest_pos = pos_num_insert - - else: - closest_num = before_num - closest_pos = pos_before - - return closest_num, closest_pos - - -################################################################################ -## mutable -################################################################################ -def isclose(mutable_a, mutable_b): - """Check if two mutable objects are close to each other. - - This function compares two mutable objects (e.g., lists, numpy arrays) element-wise - to determine if they are close to each other. - - Parameters - ---------- - mutable_a : list or numpy.ndarray - The first mutable object to compare. - mutable_b : list or numpy.ndarray - The second mutable object to compare. - - Returns - ------- - bool - True if the objects are close to each other, False otherwise. - - Example - ------- - >>> isclose([1.0, 2.0, 3.0], [1.0, 2.0001, 3.0]) - True - >>> isclose([1.0, 2.0, 3.0], [1.0, 2.1, 3.0]) - False - """ - return [math.isclose(a, b) for a, b in zip(mutable_a, mutable_b)] - - -################################################################################ -## dictionary -################################################################################ - - -################################################################################ -## variables -################################################################################ -def is_defined_global(x_str): - """ - Example: - print(is_defined('a')) - # False - - a = 5 - print(is_defined('a')) - # True - """ - return x_str in globals() - - -def is_defined_local(x_str): - """ - Example: - print(is_defined('a')) - # False - - a = 5 - print(is_defined('a')) - # True - """ - return x_str in locals() - - -################################################################################ -## versioning -################################################################################ -def is_later_or_equal(package, tgt_version, format="MAJOR.MINOR.PATCH"): - """Check if the installed version of a package is later than or equal to a target version. - - Parameters - ---------- - package : str - The name of the package to check. - tgt_version : str - The target version to compare against. - format : str, optional - The version format (default is "MAJOR.MINOR.PATCH"). - - Returns - ------- - bool - True if the installed version is later than or equal to the target version, False otherwise. - - Example - ------- - >>> is_later_or_equal('numpy', '1.18.0') - True - >>> is_later_or_equal('pandas', '2.0.0') - False - """ - - import numpy as np - - indi, matched = scitex.gen.search(["MAJOR", "MINOR", "PATCH"], format.split(".")) - imp_major, imp_minor, imp_patch = [ - int(v) for v in np.array(package.__version__.split("."))[indi] - ] - tgt_major, tgt_minor, tgt_patch = [ - int(v) for v in np.array(tgt_version.split("."))[indi] - ] - - print( - f"\npackage: {package.__name__}\n" - f"target_version: {tgt_version}\n" - f"imported_version: {imp_major}.{imp_minor}.{imp_patch}\n" - ) - - ## Mjorr - if imp_major > tgt_major: - return True - - if imp_major < tgt_major: - return False - - if imp_major == tgt_major: - ## Minor - if imp_minor > tgt_minor: - return True - - if imp_minor < tgt_minor: - return False - - if imp_minor == tgt_minor: - ## Patch - if imp_patch > tgt_patch: - return True - if imp_patch < tgt_patch: - return False - if imp_patch == tgt_patch: - return True - - -################################################################################ -## File -################################################################################ -def _copy_a_file(src, dst, allow_overwrite=False): - """Copy a single file from source to destination. - - Parameters - ---------- - src : str - The path to the source file. - dst : str - The path to the destination file. - allow_overwrite : bool, optional - If True, allows overwriting existing files (default is False). - - Raises - ------ - FileExistsError - If the destination file already exists and allow_overwrite is False. - - Example - ------- - >>> _copy_a_file('/path/to/source.txt', '/path/to/destination.txt') - >>> _copy_a_file('/path/to/source.txt', '/path/to/existing.txt', allow_overwrite=True) - """ - if src == "/dev/null": - print(f"\n/dev/null was not copied.\n") - - else: - if dst.endswith("/"): - _, src_fname, src_ext = scitex.path.split(src) - # src_fname = src + src_ext - dst = dst + src_fname + src_ext - - if not os.path.exists(dst): - shutil.copyfile(src, dst) - print(f'\nCopied "{src}" to "{dst}".\n') - - else: - if allow_overwrite: - shutil.copyfile(src, dst) - print(f'\nCopied "{src}" to "{dst}" (overwritten).\n') - - if not allow_overwrite: - print(f'\n"{dst}" exists and copying from "{src}" was aborted.\n') - - -def copy_files(src_files, dists, allow_overwrite=False): - """Copy multiple files from source(s) to destination(s). - - Parameters - ---------- - src_files : str or list of str - The path(s) to the source file(s). - dists : str or list of str - The path(s) to the destination file(s) or directory(ies). - allow_overwrite : bool, optional - If True, allows overwriting existing files (default is False). - - Example - ------- - >>> copy_files('/path/to/source.txt', '/path/to/destination/') - >>> copy_files(['/path/to/file1.txt', '/path/to/file2.txt'], ['/path/to/dest1/', '/path/to/dest2/']) - >>> copy_files('/path/to/source.txt', '/path/to/existing.txt', allow_overwrite=True) - """ - if isinstance(src_files, str): - src_files = [src_files] - - if isinstance(dists, str): - dists = [dists] - - for sf in src_files: - for dst in dists: - _copy_a_file(sf, dst, allow_overwrite=allow_overwrite) - - -def copy_the_file(sdir): - """Copy the current script file to a specified directory. - - This function copies the script file that called it to a specified directory. - It uses the calling script's filename and copies it to the given directory. - - Parameters - ---------- - sdir : str - The destination directory where the file should be copied. - - Note - ---- - This function will not copy the file if it's run in an IPython environment. - - Example - ------- - >>> copy_the_file('/path/to/destination/') - """ - THIS_FILE = inspect.stack()[1].filename - _, fname, ext = scitex.path.split(__file__) - - # dst = sdir + fname + ext - - if "ipython" not in __file__: - _copy_a_file(__file__, dst) - - -def is_nan(X): - """Check if the input contains any NaN values and raise an error if found. - - This function checks for NaN values in various data types including pandas DataFrames, - numpy arrays, PyTorch tensors, and scalar values. - - Parameters - ---------- - X : pandas.DataFrame, numpy.ndarray, torch.Tensor, float, or int - The input data to check for NaN values. - - Raises - ------ - ValueError - If any NaN value is found in the input. - - Example - ------- - >>> import numpy as np - >>> import pandas as pd - >>> import torch - >>> is_nan(pd.DataFrame({'a': [1, 2, np.nan]})) - ValueError: NaN was found in X - >>> is_nan(np.array([1, 2, 3])) - # No error raised - >>> is_nan(torch.tensor([1.0, float('nan'), 3.0])) - ValueError: NaN was found in X - >>> is_nan(float('nan')) - ValueError: X was NaN - """ - if isinstance(X, pd.DataFrame): - if X.isna().any().any(): - raise ValueError("NaN was found in X") - elif isinstance(X, np.ndarray): - if np.isnan(X).any(): - raise ValueError("NaN was found in X") - elif torch.is_tensor(X): - if X.isnan().any(): - raise ValueError("NaN was found in X") - elif isinstance(X, (float, int)): - if math.isnan(X): - raise ValueError("X was NaN") - - -def partial_at(func, index, value): - """Create a partial function with a fixed argument at a specific position. - - This function creates a new function that calls the original function with a - fixed argument inserted at the specified index position. - - Parameters - ---------- - func : callable - The original function to be partially applied. - index : int - The position at which to insert the fixed argument. - value : any - The fixed argument value to be inserted. - - Returns - ------- - callable - A new function that calls the original function with the fixed argument. - - Example - ------- - >>> def greet(greeting, name): - ... return f"{greeting}, {name}!" - >>> hello = partial_at(greet, 0, "Hello") - >>> hello("Alice") - 'Hello, Alice!' - >>> hello("Bob") - 'Hello, Bob!' - """ - - @wraps(func) - def result(*rest, **kwargs): - args = [] - args.extend(rest[:index]) - args.append(value) - args.extend(rest[index:]) - return func(*args, **kwargs) - - return result - - -def connect_nums(nums): - """Connect multiple numbers/values with hyphens. - - This function takes an iterable of numbers or values and joins them - with hyphens to create a single string representation. - - Parameters - ---------- - nums : iterable - An iterable of numbers or values to be connected. - - Returns - ------- - str - A string with the values joined by hyphens. - - Example - ------- - >>> connect_nums((0, 0)) - '0-0' - >>> connect_nums((1, 2, 3)) - '1-2-3' - >>> connect_nums(('a', 'b')) - 'a-b' - """ - return "-".join(str(num) for num in nums) - - -# def describe(df, method="mean", round_factor=1, axis=0): -# assert method in ["mean_std", "mean_ci", "median_iqr"] -# df = pd.DataFrame(df) -# with warnings.catch_warnings(): -# warnings.simplefilter("ignore", RuntimeWarning) -# if method == "mean": -# return round(np.nanmean(df, axis=axis), 3), round( -# np.nanstd(df, axis=axis) / round_factor, 3 -# ) -# if method == "median": -# med = df.median(axis=axis) -# IQR = df.quantile(0.75, axis=axis) - df.quantile(0.25, axis=axis) -# return round(med, 3), round(IQR / round_factor, 3) - - -def describe(df, method="mean_std", round_factor=3, axis=0): - """ - Compute descriptive statistics for a DataFrame. - - Example - ------- - import pandas as pd - import numpy as np - data = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [10, 20, 30, 40, 50]}) - result = describe(data, method='mean_std') - print(f"n={result['n']}, mean={result['mean']}, std={result['std']}") - - Parameters - ---------- - df : pandas.DataFrame or array-like - Input data. - method : str, optional - Statistical method to use. Options are 'mean_std', 'mean_ci', 'median_iqr'. - Default is 'mean_std'. - round_factor : int, optional - Factor to divide the spread statistic by. Default is 3. - axis : int, optional - Axis along which to compute statistics. Default is 0. - - Returns - ------- - dict - Dictionary containing statistics based on the method chosen. - """ - assert method in ["mean_std", "mean_ci", "median_iqr"] - df = pd.DataFrame(df) - nn = df.notna().sum(axis=axis) - - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - if method in ["mean_std", "mean_ci"]: - mm = np.nanmean(df, axis=axis) - if method == "mean_std": - ss = np.nanstd(df, axis=axis) - key = "std" - else: # mean_ci - ss = 1.96 * np.nanstd(df, axis=axis) / np.sqrt(nn) - key = "ci" - return { - "n": np.round(nn, 3), - "mean": np.round(mm, 3), - key: np.round(ss, 3), - } - else: # median_iqr - med = df.median(axis=axis) - iqr = df.quantile(0.75, axis=axis) - df.quantile(0.25, axis=axis) - return { - "n": np.round(nn, round_factor), - "median": np.round(med, round_factor), - "iqr": np.round(iqr, round_factor), - } - - -def _return_counting_process(): - import multiprocessing - - def _count(): - counter = 0 - while True: - print(counter) - time.sleep(1) - counter += 1 - - p1 = multiprocessing.Process(target=_count) - p1.start() - return p1 - - -def wait_key(process, tgt_key="q"): - """Wait for a specific key press while a process is running. - - This function waits for a specific key to be pressed while a given process - is running. It's typically used to provide a way to interrupt or terminate - a long-running process. - - Parameters - ---------- - process : multiprocessing.Process - The process to monitor while waiting for the key press. - tgt_key : str, optional - The target key to wait for (default is "q" for quit). - - Returns - ------- - None - - Note - ---- - This function will block until either the target key is pressed or the - monitored process terminates. - - Example - ------- - >>> import multiprocessing - >>> def long_running_task(): - ... while True: - ... pass - >>> p = multiprocessing.Process(target=long_running_task) - >>> p.start() - >>> wait_key(p) # This will wait until 'q' is pressed or the process ends - """ - """ - Example: - - p1 = scitex.gen._return_counting_process() - scitex.gen.wait_key(p1) - # press q - """ - pressed_key = None - while pressed_key != tgt_key: - pressed_key = readchar.readchar() - print(pressed_key) - process.terminate() - - -class ThreadWithReturnValue(threading.Thread): - """ - Example: - t = ThreadWithReturnValue( - target=func, args=(,), kwargs={key: val} - ) - t.start() - out = t.join() - - """ - - def __init__( - self, - group=None, - target=None, - name=None, - args=(), - kwargs={}, - Verbose=None, - ): - Thread.__init__(self, group, target, name, args, kwargs) - self._return = None - - def run(self): - if self._target is not None: - self._return = self._target(*self._args, **self._kwargs) - - def join(self, *args): - ### fixme - Thread.join(self, *args) - return self._return - - -def unique(data, axis=None): - """ - Identifies unique elements in the data along the specified axis and their counts, returning a DataFrame. - - Parameters: - - data (array-like): The input data to analyze for unique elements. - - axis (int, optional): The axis along which to find the unique elements. Defaults to None. - - Returns: - - df (pandas.DataFrame): DataFrame with unique elements and their counts. - """ - if axis is None: - uqs, counts = np.unique(data, return_counts=True) - else: - uqs, counts = np.unique(data, axis=axis, return_counts=True) - - if axis is None: - df = pd.DataFrame({"uq": uqs, "n": counts}) - else: - df = pd.DataFrame(uqs, columns=[f"axis_{i}" for i in range(uqs.shape[1])]) - df["n"] = counts - - df["n"] = df["n"].apply(lambda x: f"{int(x):,}") - - return df - - -def unique(data, axis=None): - """ - Identifies unique elements in the data along the specified axis and their counts, returning a DataFrame. - - Parameters: - - data (array-like): The input data to analyze for unique elements. - - axis (int, optional): The axis along which to find the unique elements. Defaults to None. - - Returns: - - df (pandas.DataFrame): DataFrame with unique elements and their counts. - """ - # Find unique elements and their counts - if axis is None: - uqs, counts = np.unique(data, return_counts=True) - df = pd.DataFrame({"Unique Elements": uqs, "Counts": counts}) - else: - uqs, counts = np.unique(data, axis=axis, return_counts=True) - # Create a DataFrame with unique elements - df = pd.DataFrame( - uqs, - columns=[f"Unique Elements Axis {i}" for i in range(uqs.shape[1])], - ) - # Add a column for counts - df["Counts"] = counts - - # Format the 'Counts' column with commas for thousands - df["Counts"] = df["Counts"].apply(lambda x: f"{x:,}") - - return df - - -def uq(*args, **kwargs): - """Alias for the unique function. - - This function is a wrapper around the unique function, providing the same - functionality with a shorter name. - - Parameters - ---------- - *args : positional arguments - Positional arguments to be passed to the unique function. - **kwargs : keyword arguments - Keyword arguments to be passed to the unique function. - - Returns - ------- - array_like - The result of calling the unique function with the given arguments. - - See Also - -------- - unique : The main function for finding unique elements. - - Example - ------- - >>> uq([1, 2, 2, 3, 3, 3]) - array([1, 2, 3]) - """ - return unique(*args, **kwargs) - - -# def mv_col(dataframe, column_name, position): -# temp_col = dataframe[column_name] -# dataframe.drop(labels=[column_name], axis=1, inplace=True) -# dataframe.insert(loc=position, column=column_name, value=temp_col) -# return dataframe - - -def float_linspace(start, stop, num_points): - """Generate evenly spaced floating-point numbers over a specified interval. - - This function is similar to numpy's linspace, but ensures that the output - consists of floating-point numbers with a specified number of decimal places. - - Parameters - ---------- - start : float - The starting value of the sequence. - stop : float - The end value of the sequence. - num_points : int - Number of points to generate. - - Returns - ------- - numpy.ndarray - Array of evenly spaced floating-point values. - - Example - ------- - >>> float_linspace(0, 1, 5) - array([0. , 0.25, 0.5 , 0.75, 1. ]) - >>> float_linspace(1, 2, 3) - array([1. , 1.5, 2. ]) - """ - num_points = int(num_points) # Ensure num_points is an integer - - if num_points < 2: - return np.array([start, stop]) if num_points == 2 else np.array([start]) - - step = (stop - start) / (num_points - 1) - values = [start + i * step for i in range(num_points)] - - return np.array(values) - - -# EOF diff --git a/src/scitex/gen/path.py b/src/scitex/gen/path.py deleted file mode 100755 index e69de29b..00000000