diff --git a/.travis.yml b/.travis.yml index beda761..8eabda2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,10 +30,11 @@ install: script: - pip install runtests - - mpirun -n 1 python $PWD/tests/roundtrip.py -v -Nproc 1 1 -Nmesh 13 15 16 - - mpirun -n 2 python $PWD/tests/roundtrip.py -v -Nproc 2 1 -Nproc 1 2 -Nmesh 13 15 16 - - mpirun -n 4 python $PWD/tests/roundtrip.py -v -Nproc 2 2 -Nmesh 13 15 16 - python ./runtests.py --mpirun="mpirun -np 4" + - mpirun -n 1 python $PWD/scripts/pfft-roundtrip-matrix.py -v -diag -Nproc 1 1 -Nmesh 13 15 16 + - mpirun -n 2 python $PWD/scripts/pfft-roundtrip-matrix.py -v -diag -Nproc 2 1 -Nproc 1 2 -Nmesh 13 15 16 + - mpirun -n 4 python $PWD/scripts/pfft-roundtrip-matrix.py -v -diag -Nproc 2 2 -Nmesh 13 15 16 + - mpirun -n 4 python $PWD/scripts/pfft-roundtrip-matrix.py -v -diag -Nproc 2 2 -Nmesh 13 15 - bash check_tag.sh pfft/version.py deploy: diff --git a/depends/install_pfft.sh b/depends/install_pfft.sh index 23efd07..f0d6bce 100644 --- a/depends/install_pfft.sh +++ b/depends/install_pfft.sh @@ -7,7 +7,7 @@ OPTIMIZE1=`echo "$*" | sed 's;enable-sse2;enable-sse;'` echo "Optimization for double" ${OPTIMIZE} echo "Optimization for single" ${OPTIMIZE1} -PFFT_VERSION=1.0.8-alpha3-fftw3 +PFFT_VERSION=1.0.8-alpha3-fftw3-2don2d TMP="tmp-pfft-$PFFT_VERSION" LOGFILE="build.log" diff --git a/pfft/core.pyx b/pfft/core.pyx index 21fb9f8..1b3e433 100644 --- a/pfft/core.pyx +++ b/pfft/core.pyx @@ -187,12 +187,42 @@ class Flags(int): PFFT_BUFFERED_INPLACE = _PFFT_BUFFERED_INPLACE PFFT_PADDED_R2C = _PFFT_PADDED_R2C PFFT_PADDED_C2R = _PFFT_PADDED_C2R + TRANSPOSED_NONE = _PFFT_TRANSPOSED_NONE + TRANSPOSED_IN = _PFFT_TRANSPOSED_IN + TRANSPOSED_OUT = _PFFT_TRANSPOSED_OUT + SHIFTED_NONE = _PFFT_SHIFTED_NONE + SHIFTED_IN = _PFFT_SHIFTED_IN + SHIFTED_OUT = _PFFT_SHIFTED_OUT + MEASURE = _PFFT_MEASURE + ESTIMATE = _PFFT_ESTIMATE + PATIENT = _PFFT_PATIENT + EXHAUSTIVE = _PFFT_EXHAUSTIVE + NO_TUNE = _PFFT_NO_TUNE + TUNE = _PFFT_TUNE + PRESERVE_INPUT = _PFFT_PRESERVE_INPUT + DESTROY_INPUT = _PFFT_DESTROY_INPUT + BUFFERED_INPLACE = _PFFT_BUFFERED_INPLACE + PADDED_R2C = _PFFT_PADDED_R2C + PADDED_C2R = _PFFT_PADDED_C2R def __new__(cls, value): self = int.__new__(cls, value) return self def __repr__(self): d = self.__class__.__dict__ - return '|'.join([k for k in d.keys() if k.startswith('PFFT') and (d[k] & self)]) + keys = sorted([k for k in d.keys() if k.isupper() and not k.startswith('PFFT')]) + return '|'.join([k for k in keys if (d[k] & self)]) + + def format(self, flags=None): + d = self.__class__.__dict__ + keys = sorted([k for k in d.keys() if k.isupper() and not k.startswith('PFFT')]) + s = [] + for key in keys: + if flags is not None and not (d[key] & flags): continue + if d[key] & self: + s.append(key) + else: + s.append(" " * len(key)) + return ' '.join(s) class Direction(int): """ @@ -200,12 +230,15 @@ class Direction(int): """ PFFT_FORWARD = _PFFT_FORWARD PFFT_BACKWARD = _PFFT_BACKWARD + FORWARD = _PFFT_FORWARD + BACKWARD = _PFFT_BACKWARD def __new__(cls, value): self = int.__new__(cls, value) return self def __repr__(self): d = self.__class__.__dict__ - return 'and'.join([k for k in d.keys() if k.startswith('PFFT') and (d[k] == self)]) + keys = sorted([k for k in d.keys() if k.isupper() and not k.startswith('PFFT')]) + return 'and'.join([k for k in keys if (d[k] == self)]) ###### # define Type as the transform type @@ -225,12 +258,21 @@ class Type(int): PFFTF_R2C = 5 PFFTF_C2R = 6 PFFTF_R2R = 7 + C2C = 0 + R2C = 1 + C2R = 2 + R2R = 3 + C2CF = 4 + R2CF = 5 + C2RF = 6 + R2RF = 7 def __new__(cls, value): self = int.__new__(cls, value) return self def __repr__(self): d = self.__class__.__dict__ - return 'and'.join([k for k in d.keys() if k.startswith('PFFT') and (d[k] == self)]) + keys = sorted([k for k in d.keys() if k.isupper() and not k.startswith('PFFT')]) + return 'and'.join([k for k in keys if (d[k] == self)]) ctypedef numpy.intp_t (*pfft_local_size_func)(int rnk_n, numpy.intp_t * n, cMPI.MPI_Comm comm, int pfft_flags, numpy.intp_t * local_ni, numpy.intp_t * local_i_start, @@ -484,14 +526,20 @@ cdef class Partition(object): local_ni, local_no, local_i_start, local_o_start = numpy.empty((4, n_.shape[0]), 'intp') + self.type = Type(type) + self.flags = Flags(flags) + if len(n_) < len(procmesh.np): raise ValueError("ProcMesh (%d) shall have less dimentions than Mesh (%d)" % (len(procmesh.np), len(n_))) - if len(n_) == len(procmesh.np): # https://github.com/mpip/pfft/issues/29 - raise NotImplementedError("Currently using the same ProcMesh (%d) dimentions with Mesh (%d) is not supported." % (len(procmesh.np), len(n_))) + if len(n_) == len(procmesh.np): + if len(n_) != 2 and len(n_) != 3: # https://github.com/mpip/pfft/issues/29 + raise NotImplementedError("Currently using the same ProcMesh (%d) dimentions with Mesh (%d) is not supported other than 2don2d or 3don3d" % (len(procmesh.np), len(n_))) + if (self.flags & Flags.PFFT_PADDED_R2C) | (self.flags & Flags.PFFT_PADDED_C2R): + if self.type in (Type.R2C, Type.C2R, Type.R2CF, Type.C2RF): + # https://github.com/mpip/pfft/pull/31 + raise NotImplementedError("Currently using the same ProcMesh (%d) dimentions with Mesh (%d) is not supported on padded transforms." % (len(procmesh.np), len(n_))) - self.type = Type(type) - self.flags = Flags(flags) cdef pfft_local_size_func func = PFFT_LOCAL_SIZE_FUNC[self.type] @@ -774,7 +822,9 @@ cdef class Plan(object): inplace = False if inplace != self.inplace: raise ValueError("inplace status mismatch with the plan") + func(self.plan, i.ptr, o.ptr) + def __repr__(self): return "Plan(" + \ ','.join([ diff --git a/pfft/tests/test_pfft.py b/pfft/tests/test_pfft.py index acb583e..5d5f632 100644 --- a/pfft/tests/test_pfft.py +++ b/pfft/tests/test_pfft.py @@ -196,3 +196,65 @@ def test_leak(comm): buffer = pfft.LocalBuffer(partition) #FIXME: check with @mpip if this is correct. i = buffer.view_input() + +@MPITest([4]) +def test_2d_on_2d_c2c(comm): + procmesh = pfft.ProcMesh(np=[2, 2], comm=comm) + N = (8, 8) + + data = numpy.arange(numpy.prod(N), dtype='complex128').reshape(N) + + correct = numpy.fft.fftn(data.copy()) + result = numpy.zeros_like(correct) + + partition = pfft.Partition(pfft.Type.PFFT_C2C, N, + procmesh, flags=pfft.Flags.PFFT_ESTIMATE + | pfft.Flags.PFFT_TRANSPOSED_OUT +# | pfft.Flags.PFFT_DESTROY_INPUT + | pfft.Flags.PFFT_PRESERVE_INPUT + ) + + buffer1 = pfft.LocalBuffer(partition) + buffer2 = pfft.LocalBuffer(partition) + + plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) + + buffer1.view_input()[:] = data[partition.local_i_slice] + plan.execute(buffer1, buffer2) + + result[partition.local_o_slice] = buffer2.view_output() + result = comm.allreduce(result) + assert_almost_equal(correct, result) + +@MPITest([1, 4]) +def test_2d_on_2d_r2c(comm): + if comm.size == 1: + procmesh = pfft.ProcMesh(np=[1, 1], comm=comm) + else: + procmesh = pfft.ProcMesh(np=[2, 2], comm=comm) + N = (8, 8) + + data = numpy.arange(numpy.prod(N), dtype='f8').reshape(N) + + correct = numpy.fft.rfftn(data.copy()) + result = numpy.zeros_like(correct) + + partition = pfft.Partition(pfft.Type.PFFT_R2C, N, + procmesh, flags=pfft.Flags.PFFT_ESTIMATE + | pfft.Flags.PFFT_TRANSPOSED_OUT +# | pfft.Flags.PFFT_DESTROY_INPUT + | pfft.Flags.PFFT_PRESERVE_INPUT +# | pfft.Flags.PADDED_R2C # doesn't work yet + ) + + buffer1 = pfft.LocalBuffer(partition) + buffer2 = pfft.LocalBuffer(partition) + + plan = pfft.Plan(partition, pfft.Direction.PFFT_FORWARD, buffer1, buffer2) + + buffer1.view_input()[:] = data[partition.local_i_slice] + plan.execute(buffer1, buffer2) + + result[partition.local_o_slice] = buffer2.view_output() + result = comm.allreduce(result) + assert_almost_equal(correct, result) diff --git a/tests/roundtrip.py b/scripts/pfft-roundtrip-matrix.py similarity index 57% rename from tests/roundtrip.py rename to scripts/pfft-roundtrip-matrix.py index aa2e44c..964926c 100644 --- a/tests/roundtrip.py +++ b/scripts/pfft-roundtrip-matrix.py @@ -11,16 +11,22 @@ inplace transform Examples: + + * to run in source code, first get a shell with + python runtests.py --shell + * for single-rank numpy agreement test, run with - mpirun -np 1 python roundtrip.py -Nmesh 32 32 32 -Nmesh 3 3 3 -tree -verbose + mpirun -np 1 python roundtrip.py -Nmesh 32 32 32 -Nmesh 3 3 3 -verbose * for multi-rank tests, run with - mpirun -np n python roundtrip.py -Nmesh 32 32 32 -Nmesh 3 3 3 -tree -verbose + mpirun -np 4 python roundtrip.py -Nmesh 32 32 32 -Nmesh 3 3 3 --verbose n can be any number. procmeshes tested are: np = [n], [1, n], [n, 1], [a, d], [d, a] where a * d == n and a d are closest to n** 0.5 """ +from __future__ import print_function + from mpi4py import MPI import itertools import traceback @@ -28,52 +34,46 @@ import argparse import os.path -from sys import path parser = argparse.ArgumentParser(description='Roundtrip testing of pfft', epilog=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) -parser.add_argument('-Nmesh', nargs=3, type=int, - action='append', metavar=('Nx', 'Ny', 'Nz'), +from pfft import * + +oldprint = print +def print(*args, **kwargs): + if MPI.COMM_WORLD.rank == 0: + oldprint(*args, **kwargs) + +parser.add_argument('-Nmesh', nargs='+', type=int, + action='append', help='size of FFT mesh, default is 29 30 31', default=[]) -parser.add_argument('-Nproc', nargs=2, type=int, - action='append', metavar=('Nx', 'Ny'), +parser.add_argument('-Nproc', nargs='+', type=int, + action='append', help='proc mesh', default=[]) -parser.add_argument('-tree', action='store_true', default=False, - help='Use pfft from source tree, ' + - 'built with setup.py build_ext --inplace') parser.add_argument('-diag', action='store_true', default=False, help='show which one failed and which one passed') +parser.add_argument('-rigor', default="estimate", choices=['estimate', 'measure', 'patient', 'exhaustive'], + help='the level of rigor in planning. ') parser.add_argument('-verbose', action='store_true', default=False, help='print which test will be ran') -ns = parser.parse_args() -Nmesh = ns.Nmesh -if len(Nmesh) == 0: - # default - Nmesh = [[29, 30, 31]] -if ns.tree: - # prefers to use the locally built pfft in source tree, in case there is an - # installation - path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from pfft import * - class LargeError(Exception): pass def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): + partition = Partition(type, Nmesh, procmesh, flags) for rank in range(MPI.COMM_WORLD.size): MPI.COMM_WORLD.barrier() if rank != procmesh.rank: continue - #print procmesh.rank, 'roundtrip test, np=', procmesh.np, 'Nmesh = ', Nmesh, 'inplace = ', inplace - #print repr(partition) + #oldprint(procmesh.rank, 'roundtrip test, np=', procmesh.np, 'Nmesh = ', Nmesh, 'inplace = ', inplace) + #oldprint(repr(partition)) buf1 = LocalBuffer(partition) if inplace: @@ -96,9 +96,7 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): buf2, type=type, flags=flags) - if procmesh.rank == 0: - #print repr(forward) - pass + # print(repr(forward)) # find the inverse plan typemap = { @@ -139,9 +137,7 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): type=btype, flags=bflags, ) - if procmesh.rank == 0: - #print repr(backward) - pass + #print(repr(backward)) numpy.random.seed(9999) @@ -183,8 +179,7 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): MPI.COMM_WORLD.barrier() if rank != procmesh.rank: continue - if False: - print('error', original - input) + # oldprint('error', original - input) MPI.COMM_WORLD.barrier() if False: print(repr(forward.type), 'forward', "error = ", r2cerr) @@ -193,29 +188,43 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): r2cerr = MPI.COMM_WORLD.allreduce(r2cerr, MPI.MAX) c2rerr = MPI.COMM_WORLD.allreduce(c2rerr, MPI.MAX) if (r2cerr > 5e-4): - raise LargeError("r2c: %g" % r2cerr) + raise LargeError("forward: %g" % r2cerr) if (c2rerr > 5e-4): - raise LargeError("c2r: %g" % c2rerr) - -if MPI.COMM_WORLD.size == 1: - nplist = [ - [1], - [1, 1], - ] -else: - nplist = ns.Nproc - - -try: - flags = [ - Flags.PFFT_ESTIMATE | Flags.PFFT_DESTROY_INPUT, - Flags.PFFT_ESTIMATE | Flags.PFFT_PADDED_R2C | Flags.PFFT_DESTROY_INPUT, - Flags.PFFT_ESTIMATE | Flags.PFFT_PADDED_R2C, - Flags.PFFT_ESTIMATE | Flags.PFFT_TRANSPOSED_OUT, - Flags.PFFT_ESTIMATE | Flags.PFFT_TRANSPOSED_OUT | Flags.PFFT_DESTROY_INPUT, - Flags.PFFT_ESTIMATE | Flags.PFFT_PADDED_R2C | Flags.PFFT_TRANSPOSED_OUT, - ] + raise LargeError("backward: %g" % c2rerr) + +def main(): + + ns = parser.parse_args() + Nmesh = ns.Nmesh + + if len(Nmesh) == 0: + # default + Nmesh = [[29, 30, 31]] + + if MPI.COMM_WORLD.size == 1 and len(ns.Nproc) == 0: + nplist = [ [1], [1, 1], ] + else: + nplist = ns.Nproc + + rigor = { + 'exhaustive': Flags.PFFT_EXHAUSTIVE, + 'patient' : Flags.PFFT_PATIENT, + 'estimate' : Flags.PFFT_ESTIMATE, + 'measure' : Flags.PFFT_MEASURE, + }[ns.rigor] + import itertools + import functools + + flags = [] + matrix = Flags.PFFT_DESTROY_INPUT, Flags.PFFT_PADDED_R2C, Flags.PFFT_TRANSPOSED_OUT + print_flags = functools.reduce(lambda x, y: x | y, matrix, rigor) + + matrix2 = [[0, i] for i in matrix] + for row in itertools.product(*matrix2): + flag = functools.reduce(lambda x, y: x | y, row, rigor) + flags.append(flag) + params = list(itertools.product( nplist, [Type.PFFT_C2C, Type.PFFT_R2C, Type.PFFTF_C2C, Type.PFFTF_R2C], flags, [True, False], Nmesh, @@ -223,11 +232,11 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): PASS = [] FAIL = [] + IMPL = [] for param in params: - if MPI.COMM_WORLD.rank == 0: - if ns.verbose: - f = param - print("NP", f[0], repr(Type(f[1])), repr(Flags(f[2])), "InPlace", f[3], "Nmesh", f[4]) + if ns.verbose: + f = param + print("NP", f[0], repr(Type(f[1])), repr(Flags(f[2])), "InPlace", f[3], "Nmesh", f[4]) np = param[0] procmesh = ProcMesh(np=np) try: @@ -236,19 +245,78 @@ def test_roundtrip_3d(procmesh, type, flags, inplace, Nmesh): except LargeError as e: if ns.verbose: f = param - print("Failed", e) + print("Failed", f, e) FAIL.append((param, e)) + except NotImplementedError as e: + if ns.verbose: + f = param + print("notsupported", f, e) + IMPL.append((param, e)) + + N = len(PASS) + len(FAIL) + len(IMPL) + + print("PASS", len(PASS), '/', N) + + if ns.diag: + printcase("", "", print_flags, header=True) + for f in PASS: + printcase(f, "", print_flags, ) + + print("UNIMPL", len(IMPL), '/', N) + if ns.diag: + printcase("", "", print_flags, header=True) + for f, e in IMPL: + printcase(f, e, print_flags) + + print("FAIL", len(FAIL), '/', N) + if ns.diag: + printcase("", "", print_flags, header=True) + for f, e in FAIL: + printcase(f, e, print_flags) + + if len(FAIL) != 0: + return 1 + + return 0 + +def printcase(f, e, flags, header=False): + if header: + inplace = "INPLACE" + np = "NP" + flags = "FLAGS" + type = "TYPE" + nmesh = "NMESH" + error = "ERROR" + else: + inplace = "INPL" if f[3] else "OUTP" + np = str(f[0]) + flags = Flags(f[2]).format(flags) + type = repr(Type(f[1])) + nmesh = str(f[4]) + error = str(e) + print("%(np)-6s %(nmesh)-8s %(type)-6s %(inplace)-6s %(flags)-80s %(error)-s" % locals()) + +# use unbuffered stdout +class Unbuffered(object): + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def writelines(self, datas): + self.stream.writelines(datas) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +import sys +sys.stdout = Unbuffered(sys.stdout) + +if __name__ == '__main__': + + try: + sys.exit(main()) + except Exception as e: + print(traceback.format_exc()) + MPI.COMM_WORLD.Abort() - if MPI.COMM_WORLD.rank == 0: - print("PASS", len(PASS), '/', len(params)) - if ns.diag: - for f in PASS: - print("NP", f[0], repr(Type(f[1])), repr(Flags(f[2])), "InPlace", f[3], "Nmesh", f[4]) - print("FAIL", len(FAIL), '/', len(params)) - if ns.diag: - for f, e in FAIL: - print("NP", f[0], repr(Type(f[1])), repr(Flags(f[2])), "InPlace", f[3], "Nmesh", f[4], e) - assert len(FAIL) == 0 -except Exception as e: - print(traceback.format_exc()) - MPI.COMM_WORLD.Abort() diff --git a/setup.py b/setup.py index 00dc827..b7c4d11 100644 --- a/setup.py +++ b/setup.py @@ -104,6 +104,7 @@ def find_version(path): cython_directives = {"embedsignature": True} )]), license='GPL3', + scripts=['scripts/pfft-roundtrip-matrix.py'], cmdclass = { "build_py":build_py, "build_ext": build_ext_subclass}