-
Notifications
You must be signed in to change notification settings - Fork 2
Feature/np core 20241215 #55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
61b93da
6c80f0a
2be12b4
e143eb3
32ce2bd
ed0eaab
6c32072
28f84e8
b344e9f
5ed8cfc
a92722a
515e71f
6efbca6
ef34cde
aa09c1a
d3649c2
e3bee67
b466381
8249d9f
7fb5c56
d2226ac
a1e54c2
f336493
cb9ea29
6ae5c5a
fbb6a0e
a70f1a8
bb13e14
4579aff
a5f7fb1
68fa194
313a061
e917fe2
37b624e
9e078ce
7976a4d
f754653
26fbb55
bd1436f
b460ebd
1789551
fdd7aaf
4af2f14
39ebd15
c0b4664
b935a24
69709f8
62c0a52
2239d29
413684a
1b5cf9c
cb6ac1a
8977c4d
f96e1b3
b4fb4d8
3594dff
0dbd4ae
7b8e6bf
563b402
e9e1715
07c6743
da8e93b
a94d9e7
164cc6d
0cdf49a
3b2b5cb
cb9934e
4e119a4
90eb4b5
5b0286f
5baafe0
49e11e3
b39b4b1
b76f168
9920404
945cf8e
07f85f0
dbb36da
b281a16
0b64813
7f93e35
b8bdf08
85d1def
e325127
c1c93d1
21fd963
a47b87e
7723314
27e9e38
2f938f6
03a4a96
74c2992
26a94d8
fb73a0a
d1a2720
d54e3f0
44a773d
6b06a52
a0d1032
8a56e33
98c6cf4
7fa8339
3fde96f
64845ec
4745be9
fa19a80
54b88a5
3d4a4f5
2ffa0c5
688a081
64f8149
f7c16c3
2fe0bdc
9a0f888
b51b48d
0e1edae
c065163
21d4264
929e211
86f5f78
daac8f2
88fa6f5
12dbfb3
166aab4
a822d12
e3c86c1
c4a8fad
1f295bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| from collections import Counter | ||
|
|
||
| import numpy as np | ||
|
|
||
| from .. import DistributionMode | ||
|
|
||
|
|
||
| class AlignmentCounter: | ||
| COUNT_HEADER_ELEMENTS = ("raw", "lnorm", "scaled") | ||
| INITIAL_SIZE = 1000 | ||
|
|
||
| @staticmethod | ||
| def normalise_counts(counts, feature_len, scaling_factor): | ||
| """Returns raw, length-normalised, and scaled feature counts.""" | ||
| normalised = counts / feature_len | ||
| scaled = normalised * scaling_factor | ||
| return counts, normalised, scaled | ||
|
|
||
| def get_increment(self, n_aln, increment): | ||
| # 1overN = lavern. Maya <3 | ||
|
||
| return (increment / n_aln) if self.distribution_mode == DistributionMode.ONE_OVER_N else increment | ||
|
|
||
| def __init__(self, distribution_mode=DistributionMode.ONE_OVER_N, strand_specific=False): | ||
| self.distribution_mode = distribution_mode | ||
| self.strand_specific = strand_specific | ||
| self.unannotated_reads = 0 | ||
|
|
||
| self.index = {} | ||
| self.counts = np.zeros( | ||
| (AlignmentCounter.INITIAL_SIZE, 2), | ||
| ) | ||
| def dump(self, prefix, refmgr): | ||
| ... | ||
| def get(self, key, default_val): | ||
| key_index = self.index.get(key) | ||
| if key_index is None: | ||
| return Counter() | ||
| return Counter({key: self.counts[key_index]}) | ||
|
|
||
|
||
| def setdefault(self, key, default_val): | ||
| ... | ||
|
||
|
|
||
| def has_ambig_counts(self): | ||
| return bool(self.counts[:, 1].sum() != 0) | ||
|
|
||
| def __iter__(self): | ||
| yield from self.index.keys() | ||
| def __getitem__(self, key): | ||
| key_index = self.index.get(key) | ||
| if key_index is None: | ||
| return 0.0 | ||
| return self.counts[key_index] | ||
| def __setitem__(self, key, value): | ||
| key_index = self.index.get(key) | ||
| if key_index is not None: | ||
| self.counts[key_index] = value | ||
| else: | ||
| raise KeyError(f"{key=} not found.") | ||
|
|
||
| def update_counts(self, count_stream, increment=1, ambiguous_counts=False): | ||
| contributed_counts = 0 | ||
| for hits, aln_count in count_stream: | ||
| hit = hits[0] | ||
| inc = ( | ||
| ( | ||
| self.get_increment(aln_count, increment), | ||
| increment, | ||
| ) | ||
| )[aln_count == 1] | ||
| key = ( | ||
| ( | ||
| (hit.rid, hit.rev_strand), | ||
| hit.rid | ||
| ) | ||
| )[self.strand_specific] | ||
|
|
||
| key_index = self.index.get(key) | ||
| if key_index is None: | ||
| nrows = self.counts.shape[0] | ||
| if len(self.index) == nrows: | ||
| self.counts = np.pad( | ||
| self.counts, | ||
| ((0, AlignmentCounter.INITIAL_SIZE), (0, 0),), | ||
| ) | ||
| # key_index = self.index.setdefault(key, len(self.index)) | ||
| key_index = self.index[key] = len(self.index) | ||
| self.counts[key_index][int(ambiguous_counts)] += inc | ||
| contributed_counts += inc | ||
|
|
||
| return contributed_counts | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||||
| from collections import Counter | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from .. import DistributionMode | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from .alignment_counter import AlignmentCounter | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from .alignment_counter2 import AlignmentCounter | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| from .region_counter import RegionCounter | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -50,32 +50,47 @@ def __init__( | |||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.increments = [1.0, 1.0] | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.increments_auto_detect = [1.0, self.paired_end_count / 2.0] | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.uniq_seqcounts, self.ambig_seqcounts = None, None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.uniq_regioncounts, self.ambig_regioncounts = None, None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # self.uniq_seqcounts, self.ambig_seqcounts = None, None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # self.uniq_regioncounts, self.ambig_regioncounts = None, None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self.seqcounts, self.regioncounts = None, None | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # self.uniq_seqcounts, self.ambig_seqcounts = None, None | |
| # self.uniq_regioncounts, self.ambig_regioncounts = None, None | |
| self.seqcounts, self.regioncounts = None, None | |
| """ | |
| Initialize counters that handle both unique and ambiguous counts. | |
| This consolidation improves maintainability and reduces code duplication. | |
| """ | |
| self.seqcounts, self.regioncounts = None, None |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix missing attribute access in get_unannotated_reads.
The method is missing the .unannotated_reads attribute access which could lead to runtime errors.
if self.regioncounts is not None:
- unannotated_reads += self.regioncounts
+ unannotated_reads += self.regioncounts.unannotated_reads
if self.seqcounts is not None:
- unannotated_reads += self.seqcounts
+ unannotated_reads += self.seqcounts.unannotated_reads📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| if self.seqcounts is not None: | |
| unannotated_reads += self.seqcounts | |
| if self.seqcounts is not None: | |
| unannotated_reads += self.seqcounts.unannotated_reads |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handle potential None reference in get_all_regions.
The method should handle the case where the selected counter is None.
counts = (
self.seqcounts,
self.regioncounts,
)[region_counts]
+ if counts is None:
+ return
yield from counts📝 Committable suggestion
‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.
| def get_all_regions(self, region_counts=False): | |
| # uniq_counts, ambig_counts = ( | |
| # (self.uniq_seqcounts, self.ambig_seqcounts,), | |
| # (self.uniq_regioncounts, self.ambig_regioncounts,), | |
| # )[region_counts] | |
| # yield from set(uniq_counts).union(ambig_counts) | |
| counts = ( | |
| self.seqcounts, | |
| self.regioncounts, | |
| )[region_counts] | |
| yield from counts | |
| def get_all_regions(self, region_counts=False): | |
| # uniq_counts, ambig_counts = ( | |
| # (self.uniq_seqcounts, self.ambig_seqcounts,), | |
| # (self.uniq_regioncounts, self.ambig_regioncounts,), | |
| # )[region_counts] | |
| # yield from set(uniq_counts).union(ambig_counts) | |
| counts = ( | |
| self.seqcounts, | |
| self.regioncounts, | |
| )[region_counts] | |
| if counts is None: | |
| return | |
| yield from counts |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Add input validation for feature_len and scaling_factor.
The method should validate inputs to prevent division by zero or negative values.
@staticmethod def normalise_counts(counts, feature_len, scaling_factor): """Returns raw, length-normalised, and scaled feature counts.""" + if feature_len <= 0: + raise ValueError("Feature length must be positive") + if scaling_factor < 0: + raise ValueError("Scaling factor cannot be negative") normalised = counts / feature_len scaled = normalised * scaling_factor return counts, normalised, scaled📝 Committable suggestion