From ff7f10ad03f4836a277198f1c485aececc677298 Mon Sep 17 00:00:00 2001 From: Rob Ennis Date: Sat, 4 Nov 2017 14:32:23 +0000 Subject: [PATCH 1/2] strassen algorithm using python and numba --- .../notebooks/Strassen Algorithm.ipynb | 311 ++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 performance/2017_07_strassens_algorithm/notebooks/Strassen Algorithm.ipynb diff --git a/performance/2017_07_strassens_algorithm/notebooks/Strassen Algorithm.ipynb b/performance/2017_07_strassens_algorithm/notebooks/Strassen Algorithm.ipynb new file mode 100644 index 0000000..1a63ace --- /dev/null +++ b/performance/2017_07_strassens_algorithm/notebooks/Strassen Algorithm.ipynb @@ -0,0 +1,311 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Strassen matrix multiplication algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from numba import njit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "n = 1024\n", + "A = np.random.randn(n, n)\n", + "B = np.random.randn(n, n)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 21.3 ms per loop\n" + ] + } + ], + "source": [ + "%timeit A @ B" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Naive IKJ multiplication" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "@njit\n", + "def matrix_multiply(A, B):\n", + " \n", + " n = A.shape[0]\n", + " C = np.zeros_like(A)\n", + "\n", + " for i in range(n):\n", + " for k in range(n): \n", + " for j in range(n):\n", + " C[i][j] += A[i][k] * B[k][j]\n", + " return C" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 1.05 s per loop\n" + ] + } + ], + "source": [ + "%timeit matrix_multiply(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.allclose(A @ B, matrix_multiply(A, B))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "### Naive strassen" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "@njit\n", + "def matrix_merge(A, B, C, D):\n", + "\n", + " n = A.shape[0]\n", + " result = np.empty(shape=(2*n, 2*n))\n", + "\n", + " for i in range(n):\n", + " for j in range(n):\n", + " result[i][j] = A[i][j]\n", + " result[i][j + n] = B[i][j]\n", + " result[i + n][j] = C[i][j]\n", + " result[i + n][j + n] = D[i][j]\n", + "\n", + " return result\n", + "\n", + "\n", + "@njit(parallel=True)\n", + "def matrix_split(M):\n", + " \n", + " n = M.shape[0] // 2\n", + " \n", + " A = np.empty(shape=(n, n))\n", + " B = np.empty_like(A)\n", + " C = np.empty_like(A)\n", + " D = np.empty_like(A)\n", + " \n", + " for i in range(n): \n", + " for j in range(n):\n", + " A[i][j] = M[i][j]\n", + " B[i][j] = M[i][j + n]\n", + " C[i][j] = M[i + n][j]\n", + " D[i][j] = M[i + n][j + n]\n", + "\n", + " return A, B, C, D\n", + "\n", + "@njit\n", + "def matrix_add(A, B):\n", + "\n", + " n = A.shape[0]\n", + " \n", + " C = np.empty(shape=(n, n))\n", + " for i in range(n):\n", + " for j in range(n):\n", + " C[i][j] = A[i][j] + B[i][j]\n", + " \n", + " return C\n", + "\n", + "\n", + "@njit\n", + "def matrix_subtract(A, B):\n", + "\n", + " n = A.shape[0]\n", + " \n", + " C = np.empty(shape=(n, n))\n", + " for i in range(n):\n", + " for j in range(n):\n", + " C[i][j] = A[i][j] - B[i][j]\n", + " \n", + " return C\n", + "\n", + "\n", + "@njit\n", + "def strassen_naive(A, B):\n", + " \n", + " m = A.shape[0]\n", + "\n", + " A11, A12, A21, A22 = matrix_split(A)\n", + " B11, B12, B21, B22 = matrix_split(B)\n", + " S1 = matrix_subtract(B12, B22)\n", + " S2 = matrix_add(A11, A12)\n", + " S3 = matrix_add(A21, A22)\n", + " S4 = matrix_subtract(B21, B11)\n", + " S5 = matrix_add(A11, A22)\n", + " S6 = matrix_add(B11,B22)\n", + " S7 = matrix_subtract(A12, A22)\n", + " S8 = matrix_add(B21, B22)\n", + " S9 = matrix_subtract(A11, A21)\n", + " S10 = matrix_add(B11, B12)\n", + "\n", + " cutoff = 256\n", + "\n", + " if m > cutoff:\n", + " P1 = strassen_naive(A11, S1)\n", + " P2 = strassen_naive(S2, B22) \n", + " P3 = strassen_naive(S3, B11) \n", + " P4 = strassen_naive(A22, S4) \n", + " P5 = strassen_naive(S5, S6) \n", + " P6 = strassen_naive(S7, S8) \n", + " P7 = strassen_naive(S9, S10) \n", + " else:\n", + " P1 = matrix_multiply(A11, S1)\n", + " P2 = matrix_multiply(S2, B22)\n", + " P3 = matrix_multiply(S3, B11)\n", + " P4 = matrix_multiply(A22, S4)\n", + " P5 = matrix_multiply(S5, S6)\n", + " P6 = matrix_multiply(S7, S8)\n", + " P7 = matrix_multiply(S9, S10)\n", + "\n", + " C11 = matrix_add(matrix_add(P5, P6), matrix_subtract(P4, P2))\n", + " C12 = matrix_add(P1, P2)\n", + " C21 = matrix_add(P3, P4)\n", + " C22 = matrix_subtract(matrix_add(P5, P1), matrix_add(P3, P7))\n", + "\n", + " return matrix_merge(C11, C12, C21, C22)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 225 ms per loop\n" + ] + } + ], + "source": [ + "%timeit strassen_naive(A, B)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.allclose(A @ B, strassen_naive(A, B))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 705fcfa4f3b645d07301487280e2f2305837867b Mon Sep 17 00:00:00 2001 From: rjenc29 Date: Sun, 10 Feb 2019 11:17:38 +0000 Subject: [PATCH 2/2] initial commit - py triples --- .../2019_02_pythagorean_triples/__init__.py | 0 .../more_triples.py | 56 +++++++++++++++++++ .../test_more_triples.py | 33 +++++++++++ .../2019_02_pythagorean_triples/triples.py | 1 - 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 numerical/2019_02_pythagorean_triples/__init__.py create mode 100644 numerical/2019_02_pythagorean_triples/more_triples.py create mode 100644 numerical/2019_02_pythagorean_triples/test_more_triples.py diff --git a/numerical/2019_02_pythagorean_triples/__init__.py b/numerical/2019_02_pythagorean_triples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/numerical/2019_02_pythagorean_triples/more_triples.py b/numerical/2019_02_pythagorean_triples/more_triples.py new file mode 100644 index 0000000..cccab30 --- /dev/null +++ b/numerical/2019_02_pythagorean_triples/more_triples.py @@ -0,0 +1,56 @@ + +# ref: +# https://en.wikipedia.org/wiki/Formulas_for_generating_Pythagorean_triples + +from itertools import islice +from heapq import merge + +from numba import njit + + +def take(n, iterable): + return list(islice(iterable, n)) + + +@njit +def py_triples_stifel(): + n = 1 + + while True: + denom = n * 2 + 1 + improper_numerator = n * (denom + 1) + + yield denom, improper_numerator, improper_numerator + 1 + + n += 1 + + +@njit +def py_triples_ozanam(): + n = 1 + + while True: + denom = 4 * (n + 1) + improper_numerator = denom * (1 + n) - 1 + + yield denom, improper_numerator, improper_numerator + 2 + + n += 1 + + +def py_triples_stifel_ozanam(): + # all primitive triples of the Plato and Pythagoras families + return merge(py_triples_stifel(), py_triples_ozanam()) + + +@njit +def py_triples_fibonacci(): + k = 3 + + while True: + c_2 = k ** 2 + n = (c_2 + 1) >> 1 + + yield k, (n - 1), n + + k += 2 diff --git a/numerical/2019_02_pythagorean_triples/test_more_triples.py b/numerical/2019_02_pythagorean_triples/test_more_triples.py new file mode 100644 index 0000000..643228d --- /dev/null +++ b/numerical/2019_02_pythagorean_triples/test_more_triples.py @@ -0,0 +1,33 @@ +from pytest import mark + +from more_triples import ( + py_triples_stifel, py_triples_ozanam, + py_triples_fibonacci, take, + py_triples_stifel_ozanam +) + +ALL_FNS = [ + py_triples_stifel, py_triples_ozanam, + py_triples_fibonacci +] + + +def check(gen, first_n=100): + for a, b, c in take(first_n, gen): + assert a**2 + b**2 == c**2 + + +@mark.parametrize('fn', ALL_FNS, ids=lambda x: f'{x.__name__}') +def test_py_triple(fn): + gen = fn() + check(gen) + + +def test_py_triples_fused_stifel_ozanam(): + gen = py_triples_stifel_ozanam() + check(gen) + + +if __name__ == '__main__': + import pytest + pytest.main([__file__]) diff --git a/numerical/2019_02_pythagorean_triples/triples.py b/numerical/2019_02_pythagorean_triples/triples.py index 397ef40..778ff41 100644 --- a/numerical/2019_02_pythagorean_triples/triples.py +++ b/numerical/2019_02_pythagorean_triples/triples.py @@ -17,7 +17,6 @@ def fibonacci_triples(): odd_ints.append(odd) if int(odd**0.5)**2 == int(odd) and len(odd_ints) >= 4: - n = (odd + 1) / 2 b2 = sum(odd_ints[:-1]) c2 = sum(odd_ints) yield tuple(map(int, (sqrt(odd), sqrt(b2), sqrt(c2))))