From 5ca2d95bafeab9e967769988c18124a3d0f584c0 Mon Sep 17 00:00:00 2001 From: Jonas Schwab Date: Tue, 21 May 2024 15:23:28 +0200 Subject: [PATCH 1/2] Write analysis results additionally to HDF5-file results.h5 --- py_alf/ana.py | 189 ++++++++++++++++++++++++++++++++++--- py_alf/analysis.py | 217 +++++++++++++++++++++++-------------------- py_alf/exceptions.py | 3 + py_alf/lattice.py | 38 ++++++++ 4 files changed, 335 insertions(+), 112 deletions(-) diff --git a/py_alf/ana.py b/py_alf/ana.py index 259b501..e3c3152 100644 --- a/py_alf/ana.py +++ b/py_alf/ana.py @@ -3,6 +3,8 @@ # pylint: disable=too-many-branches # pylint: disable=missing-function-docstring # pylint: disable=unbalanced-tuple-unpacking +# pylint: disable=protected-access +# pylint: disable=too-many-lines import os import pickle @@ -17,6 +19,145 @@ from .lattice import Lattice, UnitCell +class AnalysisResults: + """ + Storage for analysis restults of an observable. + + Parameters + ---------- + name : str + Observable name + sign : float + Average QMC sign + sign_err : float + Error of average QMC sign + description : str, optional + Observable description + """ + _values = {} + + def __init__(self, name, sign, sign_err, description=None): + self.name = name + self.description = description + + self._add_value('sign', sign, sign_err, 'Average QMC sign') + # setattr(self, 'results_dict', results_dict) + + + class _Value: + # pylint: disable=too-few-public-methods + def __init__(self, name, value, err=None, description=None): + self.name = name + self.value = value + self.error = err + self.description = description + + def write_to_hdf5(self, hdf5_loc): + grp = hdf5_loc.create_group(self.name) + grp.create_dataset('value', data=self.value) + if self.error is not None: + grp.create_dataset('error', data=self.error) + if self.description: + grp.attrs['description'] = self.description + + def _add_value(self, name, value, err=None, description=None): + self._values[name] = self._Value( + name, value, err, description) + + def write_to_hdf5(self, filename): + with h5py.File(filename, "a") as f: + grp = f.create_group(self.name) + if self.description: + grp.attrs['description'] = self.description + for val in self._values.values(): + val.write_to_hdf5(grp) + + def append_to_flat_dict(self, dic): + dic[self.name+'_sign'] = self._values['sign'].value + dic[self.name+'_sign_err'] = self._values['sign'].error + + + +class AnalysisResultsScal(AnalysisResults): + """ + Storage for analysis restults of a scalar type observable. + + Parameters + ---------- + name : str + Observable name + name : description + Observable description + sign : float + Average QMC sign + sign_err : float + Error of average QMC sign + val : array + Observable value(s) + err : array + Observable error(s) + """ + + def __init__(self, name, sign, sign_err, val, err, description=None): + # pylint: disable=too-many-arguments + super().__init__(name, sign, sign_err, description) + + self._add_value('val', val, err, 'Observable value(s)') + + def append_to_flat_dict(self, dic): + super().append_to_flat_dict(dic) + dat = self._values['val'].value + err = self._values['val'].error + # for i in range(len(dat)): + for i, (d, e) in enumerate(zip(dat, err)): + dic[self.name+str(i)] = d + dic[self.name+str(i)+'_err'] = e + + def savetxt(self, fname): + np.savetxt( + fname, + np.column_stack([self._values['val'].value, + self._values['val'].error]), + header='Sign: {} {}'.format( + self._values['sign'].value, + self._values['sign'].error + ) + ) + + +class AnalysisResultsLatt(AnalysisResults): + """ + Storage for analysis restults of a Lattice type observable. + + Parameters + ---------- + name : str + Observable name + name : description + Observable description + sign : float + Average QMC sign + sign_err : float + Error of average QMC sign + val : array + Observable value(s) + err : array + Observable error(s) + """ + + def __init__(self, name, sign, sign_err, latt, description=None): + # pylint: disable=too-many-arguments + super().__init__(name, sign, sign_err, description) + + self.latt = latt + + def write_to_hdf5(self, filename): + super().write_to_hdf5(filename) + with h5py.File(filename, "a") as f: + grp = f[self.name] + self.latt.write_to_hdf5(grp) + + def symmetrize(latt, syms, dat): """Symmetrize a dataset. @@ -604,7 +745,7 @@ def ana_scal(directory, obs_name): J = J_obs[:, n] / J_sign dat[n, :] = error(J) - return sign, dat + return AnalysisResultsScal(obs_name, sign[0], sign[1], dat[:, 0], dat[:, 1]) def ana_hist(directory, obs_name): @@ -620,12 +761,25 @@ def ana_hist(directory, obs_name): below = error(J_below) d_class = (upper-lower)/N_classes - dat = np.empty((N_classes, 3)) + dat = np.empty((N_classes, 2)) + err = np.empty((N_classes,)) for n in range(N_classes): J = J_obs[:, n] / J_sign - dat[n, :] = [lower+d_class*(0.5+n), *error(J)] + m, e = error(J) + dat[n, 0] = lower+d_class*(0.5+n) + dat[n, 1] = m + err[n] = e - return sign, above, below, dat, upper, lower + results = AnalysisResults(obs_name, sign[0], sign[1]) + results._add_value('dat', dat, err, 'Histogram data') + results._add_value('above', above[0], above[1], + 'Fraction of counts above histogram range') + results._add_value('below', below[0], below[1], + 'Fraction of counts below histogram range') + results._add_value('upper', upper, description='Upper histogram bound') + results._add_value('lower', lower, description='Lower histogram bound') + + return results def ana_eq(directory, obs_name, sym=None): @@ -645,7 +799,8 @@ def ana_eq(directory, obs_name, sym=None): J_obs = J_obs.reshape((N_bins, N_orb, N_orb, latt.N)) m, e = error(J_sign) - sign = (m, e) + + results = AnalysisResultsLatt(obs_name, m, e, latt) J = np.array([J_obs[n] / J_sign[n] for n in range(N_bins)]) @@ -653,17 +808,23 @@ def ana_eq(directory, obs_name, sym=None): J = symmetrize(latt, sym, J) m_K, e_K = error(J) + results._add_value('k', m_K, e_K, 'Momentum-resolved results') J_sum = J.trace(axis1=1, axis2=2) m_sum, e_sum = error(J_sum) + results._add_value('k_traced', m_sum, e_sum, + 'Momentum-resolved results. ' + 'Traced over orbital degrees of freedom') - J_R = latt.fourier_K_to_R(J) - m_R, e_R = error(J_R) + m_R, e_R = error(latt.fourier_K_to_R(J)) + results._add_value('r', m_R, e_R, 'Real-space results') - J_R_sum = latt.fourier_K_to_R(J_sum) - m_R_sum, e_R_sum = error(J_R_sum) + m_R_sum, e_R_sum = error(latt.fourier_K_to_R(J_sum)) + results._add_value('r_traced', m_R_sum, e_R_sum, + 'Momentum-resolved results. ' + 'Traced over orbital degrees of freedom') - return sign, m_K, e_K, m_sum, e_sum, m_R, e_R, m_R_sum, e_R_sum, latt + return results def ana_tau(directory, obs_name, sym=None): @@ -681,7 +842,9 @@ def ana_tau(directory, obs_name, sym=None): N_bins = len(J_sign) m, e = error(J_sign) - sign = (m, e) + + results = AnalysisResultsLatt(obs_name, m, e, latt) + results._add_value('dtau', dtau, description='Imaginary time step') J = np.array( [J_obs[n].trace(axis1=0, axis2=1) / J_sign[n] for n in range(N_bins)]) @@ -690,12 +853,14 @@ def ana_tau(directory, obs_name, sym=None): J = symmetrize(latt, sym, J) m_K, e_K = error(J) + results._add_value('k', m_K, e_K, 'Momentum-resolved results') # Fourier transform J_R = latt.fourier_K_to_R(J) m_R, e_R = error(J_R) + results._add_value('r', m_R, e_R, 'Real-space results') - return sign, m_K, e_K, m_R, e_R, dtau, latt + return results def write_res_eq(directory, obs_name, diff --git a/py_alf/analysis.py b/py_alf/analysis.py index 3623c6f..a1c7574 100644 --- a/py_alf/analysis.py +++ b/py_alf/analysis.py @@ -1,5 +1,6 @@ """Supplies the default analysis routine.""" # pylint: disable=invalid-name +# pylint: disable=protected-access import os import pickle @@ -8,6 +9,7 @@ import numpy as np from .ana import ( + AnalysisResultsScal, Parameters, ReadObs, ana_eq, @@ -19,50 +21,12 @@ write_res_eq, write_res_tau, ) -from .exceptions import TooFewBinsError +from .exceptions import AlreadyAnalyzed, TooFewBinsError -def analysis(directory, - symmetry=None, custom_obs=None, do_tau=True, always=False): - """Perform analysis in the given directory. - - Results are written to the pickled dictionary `res.pkl` and in plain text - in the folder `res/`. - - Parameters - ---------- - directory : path-like object - Directory containing Monte Carlo bins. - symmetry : list of functions, optional - List of functions reppresenting symmetry operations on lattice, - including unity. It is used to symmetrize lattice-type - observables. - custom_obs : dict, default=None - Defines additional observables derived from existing observables. - The key of each entry is the observable name and the value is a - dictionary with the format:: - - {'needs': some_list, - 'kwargs': some_dict, - 'function': some_function,} - - `some_list` contains observable names to be read by - :class:`py_alf.ana.ReadObs`. Jackknife bins and kwargs from - `some_dict` are handed to `some_function` with a separate call for - each bin. - do_tau : bool, default=True - Analyze time-displaced correlation functions. Setting this to False - speeds up analysis and makes result files much smaller. - always : bool, default=False - Do not skip if parameters and bins are older than results. - - """ - # pylint: disable=too-many-locals +def _get_obs_list(directory, always=False): # pylint: disable=too-many-branches # pylint: disable=too-many-statements - print(f'### Analyzing {directory} ###') - print(os.getcwd()) - par = Parameters(directory) if 'data.h5' in os.listdir(directory): if not always: @@ -73,11 +37,10 @@ def analysis(directory, - os.path.getmtime(os.path.join(directory, 'res.pkl')) if d1 < 0 and d2 < 0: print('already analyzed') - return + raise AlreadyAnalyzed except OSError: pass - # pylint: disable=no-member with h5py.File(os.path.join(directory, 'data.h5'), "r") as f: params = {} for name in f['parameters']: @@ -109,7 +72,7 @@ def analysis(directory, if N_bins < par.N_min(): print('too few bins ', N_bins) - return + raise TooFewBinsError else: params = {} # Stays empty, parameters are only supported with HDF5 list_obs = [] @@ -130,11 +93,63 @@ def analysis(directory, elif o.endswith('_tau'): list_obs.append(o) list_tau.append(o) + return params, list_obs, list_scal, list_hist, list_eq, list_tau + + + +def analysis(directory, + symmetry=None, custom_obs=None, do_tau=True, always=False): + """ + Perform analysis in the given directory. + + Results are written to the pickled dictionary `res.pkl` and in plain text + in the folder `res/`. + + Parameters + ---------- + directory : path-like object + Directory containing Monte Carlo bins. + symmetry : list of functions, optional + List of functions representing symmetry operations on lattice, + including unity. It is used to symmetrize lattice-type + observables. + custom_obs : dict, default=None + Defines additional observables derived from existing observables. + The key of each entry is the observable name and the value is a + dictionary with the format:: + + {'needs': some_list, + 'kwargs': some_dict, + 'function': some_function,} + + `some_list` contains observable names to be read by + :class:`py_alf.ana.ReadObs`. Jackknife bins and kwargs from + `some_dict` are handed to `some_function` with a separate call for + each bin. + do_tau : bool, default=True + Analyze time-displaced correlation functions. Setting this to False + speeds up analysis and makes result files much smaller. + always : bool, default=False + Do not skip if parameters and bins are older than results. + """ + # pylint: disable=too-many-locals + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements + print(f'### Analyzing {directory} ###') + print(os.getcwd()) + + try: + dic, list_obs, list_scal, list_hist, list_eq, list_tau = \ + _get_obs_list(directory, always) + except (AlreadyAnalyzed, TooFewBinsError): + return if 'res' not in os.listdir(directory): os.mkdir(os.path.join(directory, 'res')) - dic = params + results_file = os.path.join(directory, 'results.h5') + with h5py.File(results_file, "w") as f: + f.attrs.create('program_name', 'ALF') if custom_obs is not None: print("Custom observables:") @@ -154,6 +169,10 @@ def analysis(directory, **obs_spec['kwargs']) dat = error(J) + results = AnalysisResultsScal( + "obs_name", np.nan, np.nan, dat[0], dat[1] + ) + results.write_to_hdf5(results_file) dic[obs_name] = dat[0] dic[obs_name+'_err'] = dat[1] @@ -167,75 +186,68 @@ def analysis(directory, for obs_name in list_scal: print(obs_name) try: - sign, dat = ana_scal(directory, obs_name) + results = ana_scal(directory, obs_name) except TooFewBinsError: print("Too few bins, skipping.") continue - dic[obs_name+'_sign'] = sign[0] - dic[obs_name+'_sign_err'] = sign[1] - for i in range(len(dat)): - dic[obs_name+str(i)] = dat[i, 0] - dic[obs_name+str(i)+'_err'] = dat[i, 1] - - np.savetxt( - os.path.join(directory, 'res', obs_name), - dat, - header='Sign: {} {}'.format(*sign) - ) + results.write_to_hdf5(results_file) + results.append_to_flat_dict(dic) + results.savetxt(os.path.join(directory, 'res', obs_name)) print("Histogram observables:") for obs_name in list_hist: print(obs_name) try: - sign, above, below, dat, upper, lower = ana_hist(directory, obs_name) + results = ana_hist(directory, obs_name) except TooFewBinsError: print("Too few bins, skipping.") continue - - hist = {} - hist['dat'] = dat - hist['sign'] = sign - hist['above'] = above - hist['below'] = below - hist['upper'] = upper - hist['lower'] = lower - dic[obs_name] = hist + dic[obs_name] = results np.savetxt( os.path.join(directory, 'res', obs_name), - dat, + np.column_stack([results._values['dat'].value, + results._values['dat'].error]), header='Sign: {} {}, above {} {}, below {} {}'.format( - *sign, *above, *below) + results._values['sign'].value, results._values['sign'].error, + results._values['above'].value, results._values['above'].error, + results._values['below'].value, results._values['below'].error + ) ) print("Equal time observables:") for obs_name in list_eq: print(obs_name) try: - sign, m_k, e_k, m_k_sum, e_k_sum, m_r, e_r, m_r_sum, e_r_sum, latt = \ - ana_eq(directory, obs_name, sym=symmetry) + results = ana_eq(directory, obs_name, sym=symmetry) except TooFewBinsError: print("Too few bins, skipping.") continue + results.write_to_hdf5(results_file) + + write_res_eq( + directory, obs_name, + results._values['k'].value, results._values['k'].error, + results._values['k_traced'].value, results._values['k_traced'].error, + results._values['r'].value, results._values['r'].error, + results._values['r_traced'].value, results._values['r_traced'].error, + results.latt + ) - write_res_eq(directory, obs_name, - m_k, e_k, m_k_sum, e_k_sum, - m_r, e_r, m_r_sum, e_r_sum, latt) - - dic[obs_name+'K'] = m_k - dic[obs_name+'K_err'] = e_k - dic[obs_name+'K_sum'] = m_k_sum - dic[obs_name+'K_sum_err'] = e_k_sum - dic[obs_name+'R'] = m_r - dic[obs_name+'R_err'] = e_r - dic[obs_name+'R_sum'] = m_r_sum - dic[obs_name+'R_sum_err'] = e_r_sum + dic[obs_name+'K'] = results._values['k'].value + dic[obs_name+'K_err'] = results._values['k'].error + dic[obs_name+'K_sum'] = results._values['k_traced'].value + dic[obs_name+'K_sum_err'] = results._values['k_traced'].error + dic[obs_name+'R'] = results._values['r'].value + dic[obs_name+'R_err'] = results._values['r'].error + dic[obs_name+'R_sum'] = results._values['r_traced'].value + dic[obs_name+'R_sum_err'] = results._values['r_traced'].error dic[obs_name+'_lattice'] = { - 'L1': latt.L1, - 'L2': latt.L2, - 'a1': latt.a1, - 'a2': latt.a2 + 'L1': results.latt.L1, + 'L2': results.latt.L2, + 'a1': results.latt.a1, + 'a2': results.latt.a2 } if do_tau: @@ -243,24 +255,29 @@ def analysis(directory, for obs_name in list_tau: print(obs_name) try: - sign, m_k, e_k, m_r, e_r, dtau, latt = \ - ana_tau(directory, obs_name, sym=symmetry) + results = ana_tau(directory, obs_name, sym=symmetry) except TooFewBinsError: print("Too few bins, skipping.") continue - - write_res_tau(directory, obs_name, - m_k, e_k, m_r, e_r, dtau, latt) - - dic[obs_name+'K'] = m_k - dic[obs_name+'K_err'] = e_k - dic[obs_name+'R'] = m_r - dic[obs_name+'R_err'] = e_r + results.write_to_hdf5(results_file) + + write_res_tau( + directory, obs_name, + results._values['k'].value, results._values['k'].error, + results._values['r'].value, results._values['r'].error, + results._values['dtau'].value, + results.latt, + ) + + dic[obs_name+'K'] = results._values['k'].value + dic[obs_name+'K_err'] = results._values['k'].error + dic[obs_name+'R'] = results._values['r'].value + dic[obs_name+'R_err'] = results._values['r'].error dic[obs_name+'_lattice'] = { - 'L1': latt.L1, - 'L2': latt.L2, - 'a1': latt.a1, - 'a2': latt.a2 + 'L1': results.latt.L1, + 'L2': results.latt.L2, + 'a1': results.latt.a1, + 'a2': results.latt.a2 } with open(os.path.join(directory, 'res.pkl'), 'wb') as f: diff --git a/py_alf/exceptions.py b/py_alf/exceptions.py index 56016eb..469de95 100644 --- a/py_alf/exceptions.py +++ b/py_alf/exceptions.py @@ -2,3 +2,6 @@ class TooFewBinsError(Exception): """Triggered when observable has too few bins for analysis.""" + +class AlreadyAnalyzed(Exception): + """Triggered when parameters and bins are older than analysis results.""" diff --git a/py_alf/lattice.py b/py_alf/lattice.py index 33ce44b..9991024 100644 --- a/py_alf/lattice.py +++ b/py_alf/lattice.py @@ -48,6 +48,24 @@ def __init__(self, latt_grp): # latt_grp.attrs["N_coord"] = latt_grp.attrs["Norb"] # latt_grp.attrs["Norb"] = self.Norb + def write_to_hdf5(self, latt_grp): + """Write unit cell info to HDF5-group.""" + dset = latt_grp.create_dataset( + 'orbital_positions', data=self.orbital_positions) + dset.attrs['description'] = 'Orbital positions within a unit cell' + + dset = latt_grp.create_dataset( + 'Norb', data=self.Norb) + dset.attrs['description'] = 'Number of orbitals per unit cell' + + dset = latt_grp.create_dataset( + 'Ndim', data=self.Ndim) + dset.attrs['description'] = 'Dimensionality of lattice' + + dset = latt_grp.create_dataset( + 'N_coord', data=self.N_coord) + dset.attrs['description'] = 'Coordination number' + class Lattice: """Finite size Bravais lattice object. @@ -269,6 +287,26 @@ def plot_k(self, data): ax.set_xlabel(r'$k_x$') ax.set_ylabel(r'$k_y$') + def write_to_hdf5(self, hdf5_loc): + """Create the HDF5-group 'lattice' and write lattice info.""" + grp = hdf5_loc.create_group('lattice') + + dset = grp.create_dataset('L1', data=self.L1) + dset.attrs['description'] = 'Periodic boundary condition 1' + dset = grp.create_dataset('L2', data=self.L2) + dset.attrs['description'] = 'Periodic boundary condition 2' + dset = grp.create_dataset('a1', data=self.a1) + dset.attrs['description'] = 'Primitive lattice vector 1' + dset = grp.create_dataset('a2', data=self.a2) + dset.attrs['description'] = 'Primitive lattice vector 2' + + dset = grp.create_dataset('k', data=self.k) + dset.attrs['description'] = 'k-space coordinates' + dset = grp.create_dataset('r', data=self.r) + dset.attrs['description'] = 'Real space coordinates' + + self.unit_cell.write_to_hdf5(grp) + def _plot_2d(coords, vec1, vec2, ax, data, cmap): # pylint: disable=too-many-arguments # pylint: disable=import-outside-toplevel From 3a8013d2e9687af332d39b8e6e3f72caf13240a6 Mon Sep 17 00:00:00 2001 From: Jonas Schwab Date: Fri, 10 Jan 2025 16:15:18 +0100 Subject: [PATCH 2/2] Minor typo --- py_alf/ana.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py_alf/ana.py b/py_alf/ana.py index e3c3152..da33073 100644 --- a/py_alf/ana.py +++ b/py_alf/ana.py @@ -21,7 +21,7 @@ class AnalysisResults: """ - Storage for analysis restults of an observable. + Storage for analysis results of an observable. Parameters ----------