Skip to content

Commit 5c728ae

Browse files
committed
Fix bug with wrong method flag returned on failure of calculation of D and improved calculation of actual D when combinations (-c) are used.
1 parent ea67e26 commit 5c728ae

File tree

8 files changed

+112
-50
lines changed

8 files changed

+112
-50
lines changed

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,6 @@ target/
8181
profile_default/
8282
ipython_config.py
8383

84-
# pyenv
85-
.python-version
86-
8784
# pipenv
8885
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
8986
# However, in case of collaboration, if having platform-specific dependencies or dependencies

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.10.4

BirthdayProblem.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,13 @@ def facultyNTakeMLogE(n, nLogE, m):
171171
nTakeMFacLogE = _DecimalContext.ctx.subtract(nFacLogE, nSubMFacLogE)
172172
return nTakeMFacLogE
173173

174+
@staticmethod
175+
def facultyNaive(n):
176+
nFac = _DecimalFns.ONE
177+
for i in range(int(n),0, -1):
178+
nFac = _DecimalContext.ctx.multiply(nFac, Decimal(i))
179+
return nFac
180+
174181
# faculty method wrapper for both natural and base-2 logarithms
175182
@staticmethod
176183
def facultyLog(n, nLog, isLog2):
@@ -824,6 +831,12 @@ class _BirthdayProblemInputHandler:
824831
########################################################################################################################################################################################################
825832
########################################################################################################################################################################################################
826833

834+
# threshold for resulting log2 d size input under which we use the exact naive method for calculating inputs with
835+
# both -c and -b flags (for too large inputs we will get overflow when calculating d which is needed for the naive
836+
# method but d is not really needed to solve the problem in log 2 space so then we downgrade to Sterling's
837+
# approximation when processing the inputs instead). The used threshold implies naive calculation of 32768!
838+
LOG2_THRESHOLD_FOR_NAIVE_CALCULATION_OF_D_FOR_COMBINATIONS_AND_BINARY = Decimal('15') # corresponds to 32768
839+
827840
@staticmethod
828841
def illegalInputString(varName = None):
829842
return "Illegal input" if varName is None else "Illegal input for '" + varName + "'"
@@ -892,11 +905,20 @@ def setup(dOrDLog, nOrNLog, p, isBinary, isCombinations):
892905
if isCombinations:
893906
# d is the size of a set of items, calculate the number of permutations that is possible with it
894907
if isBinary:
895-
dLog = _DecimalFns.facultyLog(_DecimalContext.ctx.power(_DecimalFns.TWO, dOrDLog), dOrDLog, True)
896-
d = _DecimalContext.ctx.power(_DecimalFns.TWO, dLog)
908+
if _DecimalFns.isGreaterThan(dOrDLog, _BirthdayProblemInputHandler.LOG2_THRESHOLD_FOR_NAIVE_CALCULATION_OF_D_FOR_COMBINATIONS_AND_BINARY):
909+
# use approximation
910+
dLog = _DecimalFns.facultyLog(_DecimalContext.ctx.power(_DecimalFns.TWO, dOrDLog), dOrDLog, True)
911+
d = _DecimalContext.ctx.power(_DecimalFns.TWO, dLog)
912+
else:
913+
# use exact calculation
914+
d = _DecimalContext.ctx.power(_DecimalFns.TWO, dOrDLog)
915+
d = _DecimalFns.facultyNaive(d)
916+
dLog = _DecimalContext.ctx.divide(_DecimalContext.ctx.ln(d), _DecimalFns.LOG_E_2)
897917
else:
898-
dLog = _DecimalFns.facultyLog(d, _DecimalContext.ctx.ln(dOrDLog), False)
899-
d = _DecimalContext.ctx.exp(dLog)
918+
# here we always need to display d in the output so if we can't calculate it, the request will fail,
919+
# therefore we can just calculate it in a naive way without log space
920+
d = _DecimalFns.facultyNaive(dOrDLog)
921+
dLog = _DecimalContext.ctx.ln(d)
900922
else:
901923
# d is already the size of the set of combinations
902924
if isBinary:
@@ -1107,7 +1129,7 @@ def solveJson(d, dLog, n, nLog, p, pPercent, isBinary, isStirling, isTaylor, isE
11071129
(n, methodUsed) = _BirthdayProblemSolverChecked.birthdayProblemInv(d, dLog, p, method, isBinary)
11081130
lastMethodUsed = methodUsed
11091131
except BaseException as e:
1110-
methodKey = _BirthdayProblemTextFormatter.methodToText(_BirthdayProblemSolver.CalcPrecision.TAYLOR_APPROX).lower()
1132+
methodKey = _BirthdayProblemTextFormatter.methodToText(method).lower()
11111133
errorMessage = str(e).lower()
11121134
if isinstance(e, KeyboardInterrupt):
11131135
res['results'][methodKey] = { 'error': 'interrupted' }

DEVELOPERS_NOTES.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
11

2+
## Good to know about `Decimal` and calculations
3+
24
* `Decimal`'s are immutable but in some places, an input is wrapped in `Decimal(x)`. This is likely because this input
35
can sometimes be a regular number OR has been so historically and the creation of a `Decimal` has been left.
46
* Adjustments of `Decimal`'s via `adjustPrecisions` is an attempt to allow a certain number of decimals to the right of
57
the comma so that, depending on the integer part, a `Decimal` can have its precision increased or decreased at
68
different times after some processing has been done. If this results in a too big number, then the precision needed is
79
too big and we can't carry out the calculations. This limit is set at 1000 digits (out of which 100 at most are to the
810
right of the comma). Larger numbers than this will result in the calculations failing.
11+
* It takes longer to calculate a loop where a log operation occurs in every iteration rather than a multiplication,
12+
therefore calculating stuff in log space can be more time-consuming but has the advantage of allowing larger numbers
13+
without overflowing.
14+
15+
## How to release
16+
17+
* Create a new branch with the name `X.X.X-feature`
18+
* Commit and push to git
19+
* Merge in git
20+
* Pull master locally
21+
* Add tag with `git tag X.X.X`
22+
* Push tags with `git push tags`
23+
* Add new release for tag in Github

DataTest.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,29 +79,29 @@
7979
'52 -p 0.1 -c -t',
8080
True,
8181
{
82-
'd': '≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67)',
82+
'd': '80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67)',
8383
'p': '10%',
8484
'results': {
85-
'taylor': {'result': '4119363813276486714957808853108064 (≈4*10^33)'}
85+
'taylor': {'result': '4122665867622533660736208120290868 (≈4*10^33)'}
8686
}
8787
}
8888
],
8989
[
9090
'52 -p 0.5 -c -t',
9191
True,
9292
{
93-
'd': '≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67)',
93+
'd': '80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67)',
9494
'p': '50%',
9595
'results': {
96-
'taylor': {'result': '10565837726592754214318243269428637 (≈10^34)'}
96+
'taylor': {'result': '10574307231100289363611308602026252 (≈10^34)'}
9797
}
9898
}
9999
],
100100
[
101101
'52 -n 10000000000000000000 -c -s -t',
102102
True,
103103
{
104-
'd': '≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67)',
104+
'd': '80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67)',
105105
'n': '10000000000000000000 (=10^19)',
106106
'results': {
107107
'stirling': {'result': '≈0% (≈6*10^-31)'},
@@ -113,37 +113,37 @@
113113
'52 -n 10000000000000000000000000000000000 -c -s -t',
114114
True,
115115
{
116-
'd': '≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67)',
116+
'd': '80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67)',
117117
'n': '10000000000000000000000000000000000 (=10^34)',
118118
'results': {
119-
'stirling': {'result': '≈46.2536366051%'},
120-
'taylor': {'result': '≈46.2536366051%'}
119+
'stirling': {'result': '≈46.2001746672%'},
120+
'taylor': {'result': '≈46.2001746672%'}
121121
}
122122
}
123123
],
124124
[
125125
'4 -n 18 -b -c -a',
126126
True,
127127
{
128-
'd': '≈2^44.2426274105',
128+
'd': '≈2^44.2501404699',
129129
'n': '2^18',
130130
'results': {
131-
'exact': {'result': '≈0.1649423866% (≈2*10^-3)'},
132-
'stirling': {'result': '≈0.1649422224% (≈2*10^-3)'},
133-
'taylor': {'result': '≈0.1649428504% (≈2*10^-3)'}
131+
'exact': {'result': '≈0.1640861961% (≈2*10^-3)'},
132+
'stirling': {'result': '≈0.1640861961% (≈2*10^-3)'},
133+
'taylor': {'result': '≈0.1640868208% (≈2*10^-3)'}
134134
}
135135
}
136136
],
137137
[
138138
'16 -n 262144 -c -a',
139139
True,
140140
{
141-
'd': '≈20814114415223 (≈2*10^13)',
141+
'd': '20922789888000 (≈2*10^13)',
142142
'n': '262144 (≈3*10^5)',
143143
'results': {
144-
'exact': {'result': '≈0.1649423866% (≈2*10^-3)'},
145-
'stirling': {'result': '≈0.1649422224% (≈2*10^-3)'},
146-
'taylor': {'result': '≈0.1649428504% (≈2*10^-3)'}
144+
'exact': {'result': '≈0.1640861961% (≈2*10^-3)'},
145+
'stirling': {'result': '≈0.1640861961% (≈2*10^-3)'},
146+
'taylor': {'result': '≈0.1640868208% (≈2*10^-3)'}
147147
}
148148
}
149149
],
@@ -184,6 +184,17 @@
184184
}
185185
}
186186
],
187+
[
188+
'1280 -p 0.5 -b -c -e',
189+
True,
190+
{
191+
'd': '≈2^26614275474014559821953787196100807012412948367028783328633986189111799719299525295290069853854877867120534538070982737886888824825850066183609939356930416666755910887266773840385877776851876084664629106697034459995685244418266399190317043076208186461319737435225525519543453247219560088300601118286958869004726993677805799134087110255288245085785541666888810491274634074724367056992419344.3330052449',
192+
'p': '50%',
193+
'results': {
194+
'exact': {'error': 'd exceeds maximum size and is needed for method'}
195+
}
196+
}
197+
],
187198
[
188199
'12800 -n 6400 -b -c -s -t',
189200
False,

LibraryTest.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,12 @@ def testFn(args):
244244
[
245245
{ "dOrDLog": "52", "p": "0.1", "method": BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX, "isCombinations": True, "isBinary": False },
246246
True,
247-
(Decimal("4119363813276486714957808853108064"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
247+
(Decimal("4122665867622533660736208120290868"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
248248
],
249249
[
250250
{ "dOrDLog": "52", "p": "0.5", "method": BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX, "isCombinations": True, "isBinary": False },
251251
True,
252-
(Decimal("10565837726592754214318243269428637"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
252+
(Decimal("10574307231100289363611308602026252"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
253253
],
254254
[
255255
{ "dOrDLog": "52", "nOrNLog": "10000000000000000000", "method": BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX, "isCombinations": True, "isBinary": False },
@@ -264,42 +264,42 @@ def testFn(args):
264264
[
265265
{ "dOrDLog": "52", "nOrNLog": "10000000000000000000000000000000000", "method": BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX, "isCombinations": True, "isBinary": False },
266266
True,
267-
(Decimal("0.462536366051"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
267+
(Decimal("0.462001746672"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
268268
],
269269
[
270270
{ "dOrDLog": "52", "nOrNLog": "10000000000000000000000000000000000", "method": BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX, "isCombinations": True, "isBinary": False },
271271
True,
272-
(Decimal("0.462536366051"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
272+
(Decimal("0.462001746672"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
273273
],
274274
[
275275
{ "dOrDLog": "4", "nOrNLog": "18", "method": BirthdayProblem.Solver.CalcPrecision.EXACT, "isCombinations": True, "isBinary": True },
276276
True,
277-
(Decimal("0.001649423866"), BirthdayProblem.Solver.CalcPrecision.EXACT)
277+
(Decimal("0.001640861961"), BirthdayProblem.Solver.CalcPrecision.EXACT)
278278
],
279279
[
280280
{ "dOrDLog": "4", "nOrNLog": "18", "method": BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX, "isCombinations": True, "isBinary": True },
281281
True,
282-
(Decimal("0.001649422224"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
282+
(Decimal("0.001640861961"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
283283
],
284284
[
285285
{ "dOrDLog": "4", "nOrNLog": "18", "method": BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX, "isCombinations": True, "isBinary": True },
286286
True,
287-
(Decimal("0.001649428504"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
287+
(Decimal("0.001640868208"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
288288
],
289289
[
290290
{ "dOrDLog": "16", "nOrNLog": "262144", "method": BirthdayProblem.Solver.CalcPrecision.EXACT, "isCombinations": True, "isBinary": False },
291291
True,
292-
(Decimal("0.001649423866"), BirthdayProblem.Solver.CalcPrecision.EXACT)
292+
(Decimal("0.001640861961"), BirthdayProblem.Solver.CalcPrecision.EXACT)
293293
],
294294
[
295295
{ "dOrDLog": "16", "nOrNLog": "262144", "method": BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX, "isCombinations": True, "isBinary": False },
296296
True,
297-
(Decimal("0.001649422224"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
297+
(Decimal("0.001640861961"), BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX)
298298
],
299299
[
300300
{ "dOrDLog": "16", "nOrNLog": "262144", "method": BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX, "isCombinations": True, "isBinary": False },
301301
True,
302-
(Decimal("0.001649428504"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
302+
(Decimal("0.001640868208"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
303303
],
304304
[
305305
{ "dOrDLog": "20922789888000", "nOrNLog": "262144", "method": BirthdayProblem.Solver.CalcPrecision.EXACT, "isCombinations": False, "isBinary": False },
@@ -336,6 +336,11 @@ def testFn(args):
336336
True,
337337
(Decimal("0"), BirthdayProblem.Solver.CalcPrecision.TAYLOR_APPROX)
338338
],
339+
[
340+
{ "dOrDLog": "1280", "p": "0.5", "method": BirthdayProblem.Solver.CalcPrecision.EXACT, "isCombinations": True, "isBinary": True },
341+
False,
342+
"d exceeds maximum size and is needed for method"
343+
],
339344
[
340345
{ "dOrDLog": "12800", "nOrNLog": "6400", "method": BirthdayProblem.Solver.CalcPrecision.STIRLING_APPROX, "isCombinations": True, "isBinary": True },
341346
False,

OutputTest.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -148,44 +148,44 @@
148148
],
149149
['52 -p 0.1 -c -t', True,
150150
[
151-
'The number of samples, sampled uniformly at random from a set of ≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67) items, needed to have at least a 10% chance of a non-unique sample is:',
152-
' 4119363813276486714957808853108064 (≈4*10^33) (Taylor series approximation used in main calculation)'
151+
'The number of samples, sampled uniformly at random from a set of 80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67) items, needed to have at least a 10% chance of a non-unique sample is:',
152+
' 4122665867622533660736208120290868 (≈4*10^33) (Taylor series approximation used in main calculation)'
153153
]
154154
],
155155
['52 -p 0.5 -c -t', True,
156156
[
157-
'The number of samples, sampled uniformly at random from a set of ≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67) items, needed to have at least a 50% chance of a non-unique sample is:',
158-
' 10565837726592754214318243269428637 (≈10^34) (Taylor series approximation used in main calculation)'
157+
'The number of samples, sampled uniformly at random from a set of 80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67) items, needed to have at least a 50% chance of a non-unique sample is:',
158+
' 10574307231100289363611308602026252 (≈10^34) (Taylor series approximation used in main calculation)'
159159
]
160160
],
161161
['52 -n 10000000000000000000 -c -s -t', True,
162162
[
163-
'The probability of finding at least one non-unique sample among 10000000000000000000 (=10^19) samples, sampled uniformly at random from a set of ≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67) items, is:',
163+
'The probability of finding at least one non-unique sample among 10000000000000000000 (=10^19) samples, sampled uniformly at random from a set of 80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67) items, is:',
164164
" ≈0% (≈6*10^-31) (Stirling's approximation used in factorial calculation)",
165165
' ≈0% (≈6*10^-31) (Taylor series approximation used in main calculation (removes need for factorial calculation))'
166166
]
167167
],
168168
['52 -n 10000000000000000000000000000000000 -c -s -t', True,
169169
[
170-
'The probability of finding at least one non-unique sample among 10000000000000000000000000000000000 (=10^34) samples, sampled uniformly at random from a set of ≈80529020383886612857810199580012764961409004334781435987268084328737 (≈8*10^67) items, is:',
171-
" ≈46.2536366051% (Stirling's approximation used in factorial calculation)",
172-
' ≈46.2536366051% (Taylor series approximation used in main calculation (removes need for factorial calculation))'
170+
'The probability of finding at least one non-unique sample among 10000000000000000000000000000000000 (=10^34) samples, sampled uniformly at random from a set of 80658175170943878571660636856403766975289505440883277824000000000000 (≈8*10^67) items, is:',
171+
" ≈46.2001746672% (Stirling's approximation used in factorial calculation)",
172+
' ≈46.2001746672% (Taylor series approximation used in main calculation (removes need for factorial calculation))'
173173
]
174174
],
175175
['4 -n 18 -b -c -a', True,
176176
[
177-
'The probability of finding at least one non-unique sample among 2^18 samples, sampled uniformly at random from a set of ≈2^44.2426274105 items, is:',
178-
' ≈0.1649423866% (≈2*10^-3) (Exact method)',
179-
" ≈0.1649422224% (≈2*10^-3) (Stirling's approximation used in factorial calculation)",
180-
' ≈0.1649428504% (≈2*10^-3) (Taylor series approximation used in main calculation (removes need for factorial calculation))'
177+
'The probability of finding at least one non-unique sample among 2^18 samples, sampled uniformly at random from a set of ≈2^44.2501404699 items, is:',
178+
' ≈0.1640861961% (≈2*10^-3) (Exact method)',
179+
" ≈0.1640861961% (≈2*10^-3) (Stirling's approximation used in factorial calculation)",
180+
' ≈0.1640868208% (≈2*10^-3) (Taylor series approximation used in main calculation (removes need for factorial calculation))'
181181
]
182182
],
183183
['16 -n 262144 -c -a', True,
184184
[
185-
'The probability of finding at least one non-unique sample among 262144 (≈3*10^5) samples, sampled uniformly at random from a set of ≈20814114415223 (≈2*10^13) items, is:',
186-
' ≈0.1649423866% (≈2*10^-3) (Exact method)',
187-
" ≈0.1649422224% (≈2*10^-3) (Stirling's approximation used in factorial calculation)",
188-
' ≈0.1649428504% (≈2*10^-3) (Taylor series approximation used in main calculation (removes need for factorial calculation))'
185+
'The probability of finding at least one non-unique sample among 262144 (≈3*10^5) samples, sampled uniformly at random from a set of 20922789888000 (≈2*10^13) items, is:',
186+
' ≈0.1640861961% (≈2*10^-3) (Exact method)',
187+
" ≈0.1640861961% (≈2*10^-3) (Stirling's approximation used in factorial calculation)",
188+
' ≈0.1640868208% (≈2*10^-3) (Taylor series approximation used in main calculation (removes need for factorial calculation))'
189189
]
190190
],
191191
['20922789888000 -n 262144 -a', True,
@@ -210,6 +210,12 @@
210210
' 0% (Taylor series approximation used in main calculation (removes need for factorial calculation))'
211211
]
212212
],
213+
['1280 -p 0.5 -b -c -e', True,
214+
[
215+
'The number of samples, sampled uniformly at random from a set of ≈2^26614275474014559821953787196100807012412948367028783328633986189111799719299525295290069853854877867120534538070982737886888824825850066183609939356930416666755910887266773840385877776851876084664629106697034459995685244418266399190317043076208186461319737435225525519543453247219560088300601118286958869004726993677805799134087110255288245085785541666888810491274634074724367056992419344.3330052449 items, needed to have at least a 50% chance of a non-unique sample is:',
216+
" N/A (Calculation failed: d exceeds maximum size and is needed for method (Exact method))"
217+
]
218+
],
213219
['12800 -n 6400 -b -c -s -t', False, "dLog exceeds maximum size and is needed to initialize calculations"]
214220
]
215221

0 commit comments

Comments
 (0)