Skip to content

Commit 20b3ffc

Browse files
committed
Chore: upadate copyright year
Signed-off-by: Dasun Abeykoon <Dasun20202020@gmail.com>
1 parent 1637ed5 commit 20b3ffc

File tree

6 files changed

+108
-51
lines changed

6 files changed

+108
-51
lines changed

src/diffpy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
##############################################################################
33
#
4-
# (c) 2025 The Trustees of Columbia University in the City of New York.
4+
# (c) 2024-2025 The Trustees of Columbia University in the City of New York.
55
# All rights reserved.
66
#
77
# File coded by: Billinge Group members and community contributors.

src/diffpy/nmf_mapping/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
##############################################################################
33
#
4-
# (c) 2025 The Trustees of Columbia University in the City of New York.
4+
# (c) 2024-2025 The Trustees of Columbia University in the City of New York.
55
# All rights reserved.
66
#
77
# File coded by: Billinge Group members and community contributors.

src/diffpy/nmf_mapping/main.py

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ def boolean_string(s):
2020

2121
def main(args=None):
2222
"""Parses directory argument supplied by user and conducts NMF
23-
decomposition analysis (computes NMF decomposition and shows the weights
24-
over time)."""
23+
decomposition analysis (computes NMF decomposition and shows the
24+
weights over time)."""
2525

2626
_BANNER = """
2727
This is a package which takes a directory of 1D diffraction files
@@ -38,13 +38,17 @@ def main(args=None):
3838

3939
def tup(s):
4040
if not isinstance(s, str):
41-
raise TypeError("Input must be a string of two integers separated by a comma.")
41+
raise TypeError(
42+
"Input must be a string of two integers separated by a comma."
43+
)
4244

4345
try:
4446
l, h = map(int, s.split(","))
4547
return l, h
4648
except ValueError:
47-
raise ValueError("Input must be two integers separated by a comma (e.g., '1,5')")
49+
raise ValueError(
50+
"Input must be two integers separated by a comma (e.g., '1,5')"
51+
)
4852

4953
# args
5054
parser.add_argument(
@@ -94,15 +98,19 @@ def tup(s):
9498
"--xrd",
9599
default=False,
96100
type=boolean_string,
97-
help="whether to look for .xy files rather than .gr files\n" "default: False\n" "e.g. --xrd True",
101+
help="whether to look for .xy files rather than .gr files\n"
102+
"default: False\n"
103+
"e.g. --xrd True",
98104
)
99105
parser.add_argument(
100106
"--x_units",
101107
default=None,
102108
type=str,
103109
choices=["twotheta", "q"],
104110
required="--xrd" in sys.argv,
105-
help="x axis units for XRD data\n" "default: None\n" "e.g. --x_units twotheta",
111+
help="x axis units for XRD data\n"
112+
"default: None\n"
113+
"e.g. --x_units twotheta",
106114
)
107115
parser.add_argument(
108116
"--xrange",
@@ -152,7 +160,9 @@ def tup(s):
152160

153161
print(f"Number of components: {len(df_components.columns)}")
154162

155-
fig1 = nmf.component_plot(df_components, args1.xrd, args1.x_units, args1.show)
163+
fig1 = nmf.component_plot(
164+
df_components, args1.xrd, args1.x_units, args1.show
165+
)
156166
fig2 = nmf.component_ratio_plot(df_component_weight_timeseries, args1.show)
157167
fig3 = nmf.reconstruction_error_plot(df_reconstruction_error, args1.show)
158168
if args1.pca_thresh:
@@ -161,10 +171,18 @@ def tup(s):
161171
if args1.save_files:
162172
if not os.path.exists(os.path.join(os.getcwd(), "nmf_result")):
163173
os.mkdir(os.path.join(os.getcwd(), "nmf_result"))
164-
output_fn = datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S%f")
165-
df_components.to_json(os.path.join(os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"))
174+
output_fn = datetime.fromtimestamp(time.time()).strftime(
175+
"%Y%m%d%H%M%S%f"
176+
)
177+
df_components.to_json(
178+
os.path.join(
179+
os.getcwd(), "nmf_result", "x_index_vs_y_col_components.json"
180+
)
181+
)
166182
df_component_weight_timeseries.to_json(
167-
os.path.join(os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json")
183+
os.path.join(
184+
os.getcwd(), "nmf_result", "component_index_vs_pratio_col.json"
185+
)
168186
)
169187
df_component_weight_timeseries.to_csv(
170188
os.path.join(
@@ -178,27 +196,47 @@ def tup(s):
178196
mode="a",
179197
)
180198
df_reconstruction_error.to_json(
181-
os.path.join(os.getcwd(), "nmf_result", "component_index_vs_RE_value.json")
199+
os.path.join(
200+
os.getcwd(), "nmf_result", "component_index_vs_RE_value.json"
201+
)
202+
)
203+
plot_file1 = os.path.join(
204+
os.getcwd(), "nmf_result", output_fn + "comp_plot.png"
205+
)
206+
plot_file2 = os.path.join(
207+
os.getcwd(), "nmf_result", output_fn + "ratio_plot.png"
208+
)
209+
plot_file3 = os.path.join(
210+
os.getcwd(), "nmf_result", output_fn + "loss_plot.png"
182211
)
183-
plot_file1 = os.path.join(os.getcwd(), "nmf_result", output_fn + "comp_plot.png")
184-
plot_file2 = os.path.join(os.getcwd(), "nmf_result", output_fn + "ratio_plot.png")
185-
plot_file3 = os.path.join(os.getcwd(), "nmf_result", output_fn + "loss_plot.png")
186212
if args1.pca_thresh:
187-
plot_file7 = os.path.join(os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png")
213+
plot_file7 = os.path.join(
214+
os.getcwd(), "nmf_result", output_fn + "pca_var_plot.png"
215+
)
188216
plot_file4 = os.path.splitext(plot_file1)[0] + ".pdf"
189217
plot_file5 = os.path.splitext(plot_file2)[0] + ".pdf"
190218
plot_file6 = os.path.splitext(plot_file3)[0] + ".pdf"
191219
if args1.pca_thresh:
192220
plot_file8 = os.path.splitext(plot_file7)[0] + ".pdf"
193-
txt_file = os.path.join(os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt")
221+
txt_file = os.path.join(
222+
os.getcwd(), "nmf_result", output_fn + "_meta" + ".txt"
223+
)
194224
with open(txt_file, "w+") as fi:
195225
fi.write("NMF Analysis\n\n")
196-
fi.write(f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n")
226+
fi.write(
227+
f"{len(df_component_weight_timeseries.columns)} files uploaded for analysis.\n\n"
228+
)
197229
fi.write(f"The selected active r ranges are: {args1.xrange} \n\n")
198230
fi.write("Thesholding:\n")
199-
fi.write(f"\tThe input component threshold was: {args1.threshold}\n")
200-
fi.write(f"\tThe input improvement threshold was: {args1.improve_thresh}\n")
201-
fi.write(f"\tThe input # of iterations to run was: {args1.n_iter}\n")
231+
fi.write(
232+
f"\tThe input component threshold was: {args1.threshold}\n"
233+
)
234+
fi.write(
235+
f"\tThe input improvement threshold was: {args1.improve_thresh}\n"
236+
)
237+
fi.write(
238+
f"\tThe input # of iterations to run was: {args1.n_iter}\n"
239+
)
202240
fi.write(f"\tWas PCA thresholding used?: {args1.pca_thresh}\n")
203241
fi.write(f"{len(df_components.columns)} components were extracted")
204242

@@ -214,7 +252,9 @@ def tup(s):
214252
fig4.savefig(plot_file8)
215253
columns = df_components.columns
216254
for i, col in enumerate(columns):
217-
data = np.column_stack([df_components.index.to_list(), df_components[col].to_list()])
255+
data = np.column_stack(
256+
[df_components.index.to_list(), df_components[col].to_list()]
257+
)
218258

219259
if args1.xrd:
220260
np.savetxt(
@@ -237,7 +277,8 @@ def tup(s):
237277
output_fn + f"_comp{i}" + ".cgr",
238278
),
239279
data,
240-
header=f"NMF Generated PDF\nSource: nmfMapping\n" f"Date: {output_fn}\nr g",
280+
header=f"NMF Generated PDF\nSource: nmfMapping\n"
281+
f"Date: {output_fn}\nr g",
241282
fmt="%s",
242283
)
243284

src/diffpy/nmf_mapping/nmf_mapping_code.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ def natural_keys_file_name(text):
3636

3737

3838
def load_data(dir, xrd=False):
39-
"""Takes a directory and selects out the .gr or .xy or .xye files. Loads
40-
these files into a 3D numpy array.
39+
"""Takes a directory and selects out the .gr or .xy or .xye files.
40+
Loads these files into a 3D numpy array.
4141
4242
Parameters
4343
----------
@@ -80,7 +80,9 @@ def load_data(dir, xrd=False):
8080
x = new_dat[:, 0]
8181
y = new_dat[:, 1]
8282
if len(x) != len(x_set) or not all(x == x_set):
83-
f = interpolate.interp1d(x, y, bounds_error=False, fill_value="extrapolate")
83+
f = interpolate.interp1d(
84+
x, y, bounds_error=False, fill_value="extrapolate"
85+
)
8486
data_arr[i][:, 1] = f(x_set)
8587
data_arr[i][:, 0] = x_set
8688
else:
@@ -106,10 +108,10 @@ def NMF_decomposition(
106108
pca_thresh=None,
107109
):
108110
"""Takes a 3D array of PDFs and returns the structurally significant
109-
components present in all of the PDFs (or XRD) provided in r vs gr format,
110-
as well as the ratio of each in the data list, as well as the
111-
reconstruction error found in the first 10 components regardless of the
112-
threshold.
111+
components present in all of the PDFs (or XRD) provided in r vs gr
112+
format, as well as the ratio of each in the data list, as well as
113+
the reconstruction error found in the first 10 components regardless
114+
of the threshold.
113115
114116
Parameters
115117
----------
@@ -150,7 +152,8 @@ def NMF_decomposition(
150152
else:
151153
df_list.append(
152154
x_vs_y_df_preprocess[
153-
(x_vs_y_df_preprocess.index >= x_low) & (x_vs_y_df_preprocess.index <= x_high)
155+
(x_vs_y_df_preprocess.index >= x_low)
156+
& (x_vs_y_df_preprocess.index <= x_high)
154157
]
155158
)
156159
x_vs_y_df = pd.concat(df_list)
@@ -173,7 +176,9 @@ def NMF_decomposition(
173176
pca.fit(x_vs_y_df.to_numpy().T)
174177
pca_number_components = len(pca.components_)
175178
pca_explained_variance = pca.explained_variance_ratio_
176-
df_explained_var_ratio = pd.DataFrame(pd.Series(pca_explained_variance))
179+
df_explained_var_ratio = pd.DataFrame(
180+
pd.Series(pca_explained_variance)
181+
)
177182
df_explained_var_ratio.index = df_explained_var_ratio.index + 1
178183
sweeping_grid = range(1, max_comp + 1, 1)
179184
for i in sweeping_grid:
@@ -186,7 +191,9 @@ def NMF_decomposition(
186191
if thresh is None:
187192
if improve_thresh is not None:
188193
if improve_thresh > 1 or improve_thresh < 0:
189-
raise ValueError("Invalid improvement threshold ratio. Must be between 0 and 1.")
194+
raise ValueError(
195+
"Invalid improvement threshold ratio. Must be between 0 and 1."
196+
)
190197
thresh = nmf_ncomp_selection(nmf_loss, rtol=improve_thresh)
191198
elif pca_thresh:
192199
thresh = pca_number_components
@@ -209,7 +216,9 @@ def NMF_decomposition(
209216
nmf_weight /= nmf_weight.sum(1)[:, np.newaxis]
210217
nmf_weight = nmf_weight.T
211218
nmf_weight = np.array([nmf_weight[s, :] for s in range(n_comp)])
212-
df_component_weight_timeseries = pd.DataFrame(nmf_weight, index=range(n_comp))
219+
df_component_weight_timeseries = pd.DataFrame(
220+
nmf_weight, index=range(n_comp)
221+
)
213222

214223
if pca_thresh:
215224
return (
@@ -226,8 +235,9 @@ def NMF_decomposition(
226235

227236

228237
def component_plot(df_components, xrd=False, x_units=None, show=True):
229-
"""Takes a dataframe containing the NMF components as columns and x index,
230-
Returns a matplotlib figure representing the constituent component plot.
238+
"""Takes a dataframe containing the NMF components as columns and x
239+
index, Returns a matplotlib figure representing the constituent
240+
component plot.
231241
232242
Parameters
233243
----------
@@ -280,10 +290,10 @@ def component_plot(df_components, xrd=False, x_units=None, show=True):
280290

281291

282292
def component_ratio_plot(df_component_weight_timeseries, show=True):
283-
"""Takes a pandas df with the index representing the components and the
284-
columns representing the different experiments, the values being the
285-
weight. Returns a matplotlib figure of the component ratio across the files
286-
provided.
293+
"""Takes a pandas df with the index representing the components and
294+
the columns representing the different experiments, the values being
295+
the weight. Returns a matplotlib figure of the component ratio
296+
across the files provided.
287297
288298
Parameters
289299
----------
@@ -316,9 +326,9 @@ def component_ratio_plot(df_component_weight_timeseries, show=True):
316326

317327

318328
def reconstruction_error_plot(df_reconstruction_error, show=True):
319-
"""Takes a pandas df with one column representing the reconstruction error
320-
and an index of the phase component. Returns a matplotlib figure of the
321-
reconstruction error plot.
329+
"""Takes a pandas df with one column representing the reconstruction
330+
error and an index of the phase component. Returns a matplotlib
331+
figure of the reconstruction error plot.
322332
323333
Parameters
324334
----------
@@ -353,9 +363,9 @@ def reconstruction_error_plot(df_reconstruction_error, show=True):
353363

354364

355365
def explained_variance_plot(df_explained_var_ratio, show=True):
356-
"""Takes a pandas df with one column representing the reconstruction error
357-
and an index of the phase component. Returns a matplotlib figure of the
358-
reconstruction error plot.
366+
"""Takes a pandas df with one column representing the reconstruction
367+
error and an index of the phase component. Returns a matplotlib
368+
figure of the reconstruction error plot.
359369
360370
Parameters
361371
----------
@@ -407,10 +417,14 @@ def nmf_ncomp_selection(loss, rtol=None):
407417
rtol = 1e-2
408418
(inds,) = np.where(imp_ratio <= rtol)
409419
if not list(inds):
410-
print("Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff")
420+
print(
421+
"Improvement ratio of 1E-2 not met. Inspect data and impose manual cutoff"
422+
)
411423
len(loss)
412424
return starting_len
413425
if not list(inds):
414-
print(f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff")
426+
print(
427+
f"Improvement ratio of {rtol} not met. Inspect data and impose manual cutoff"
428+
)
415429
return starting_len
416430
return inds[0] + 1

src/diffpy/nmf_mapping/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
##############################################################################
33
#
4-
# (c) 2025 The Trustees of Columbia University in the City of New York.
4+
# (c) 2024-2025 The Trustees of Columbia University in the City of New York.
55
# All rights reserved.
66
#
77
# File coded by: Billinge Group members and community contributors.

src/diffpy/version.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
__version__ = get_distribution("diffpy.nmf_mapping").version
2424

2525
# we assume that tag_date was used and __version__ ends in YYYYMMDD
26-
__date__ = __version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
26+
__date__ = (
27+
__version__[-8:-4] + "-" + __version__[-4:-2] + "-" + __version__[-2:]
28+
)
2729

2830
# End of file

0 commit comments

Comments
 (0)