From dfe7cab9bbc2d317d34611ea1e7c77179eb05874 Mon Sep 17 00:00:00 2001 From: Marcus Date: Sun, 15 Mar 2020 23:31:33 -0400 Subject: [PATCH 01/11] allow visual studio compilation --- S4/RNP/Eigensystems.h | 4 ++++ S4/config.h | 4 ++++ S4/fmm/fft_iface.cpp | 2 +- S4/fmm/fmm_PolBasisJones.cpp | 1 + S4/main_python.c | 6 ++++-- S4/pattern/predicates.c | 2 ++ examples/C_api/spec.awk | 0 7 files changed, 16 insertions(+), 3 deletions(-) mode change 100755 => 100644 examples/C_api/spec.awk diff --git a/S4/RNP/Eigensystems.h b/S4/RNP/Eigensystems.h index eee2560c..f5950b1d 100644 --- a/S4/RNP/Eigensystems.h +++ b/S4/RNP/Eigensystems.h @@ -1,6 +1,10 @@ #ifndef _RNP_EIGENSYSTEMS_H_ #define _RNP_EIGENSYSTEMS_H_ +#ifdef _MSC_VER +#include +#endif + #include #include diff --git a/S4/config.h b/S4/config.h index 87dbdc38..53b55b4b 100644 --- a/S4/config.h +++ b/S4/config.h @@ -8,3 +8,7 @@ #define PACKAGE_URL "" #define PACKAGE_VERSION "1.1.1" #define VERSION "1.1.1" + +#ifdef _MSC_VER +#define strcasecmp _stricmp +#endif \ No newline at end of file diff --git a/S4/fmm/fft_iface.cpp b/S4/fmm/fft_iface.cpp index 187a145e..4bb208d4 100644 --- a/S4/fmm/fft_iface.cpp +++ b/S4/fmm/fft_iface.cpp @@ -20,7 +20,7 @@ #include "fft_iface.h" #include -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 #include #else #include diff --git a/S4/fmm/fmm_PolBasisJones.cpp b/S4/fmm/fmm_PolBasisJones.cpp index e9389ad6..0a95d685 100644 --- a/S4/fmm/fmm_PolBasisJones.cpp +++ b/S4/fmm/fmm_PolBasisJones.cpp @@ -19,6 +19,7 @@ #include +#define _USE_MATH_DEFINES #include #include #include "../RNP/TBLAS.h" diff --git a/S4/main_python.c b/S4/main_python.c index 376f6bbc..536f6965 100644 --- a/S4/main_python.c +++ b/S4/main_python.c @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include "Python.h" #include "numpy/arrayobject.h" @@ -1486,7 +1488,7 @@ static PyObject *S4Sim_GetFieldsOnGridNumpy(S4Sim *self, PyObject *args, PyObjec /* strides[0] = strides[1]; */ /* strides[1] = temp; */ /* PyArray_UpdateFlags(Earr, NPY_ARRAY_UPDATE_ALL); */ - PyArray_ENABLEFLAGS(Earr, NPY_ARRAY_OWNDATA); + PyArray_ENABLEFLAGS((PyArrayObject *)Earr, NPY_ARRAY_OWNDATA); /* PyArray_ENABLEFLAGS(Earr, NPY_ARRAY_F_CONTIGUOUS); */ PyObject *Harr; Harr = PyArray_SimpleNewFromData(3, dims, NPY_COMPLEX128, Hfields); @@ -1497,7 +1499,7 @@ static PyObject *S4Sim_GetFieldsOnGridNumpy(S4Sim *self, PyObject *args, PyObjec /* strides[1] = temp; */ /* PyArray_UpdateFlags(Harr, NPY_ARRAY_UPDATE_ALL); */ - PyArray_ENABLEFLAGS(Harr, NPY_ARRAY_OWNDATA); + PyArray_ENABLEFLAGS((PyArrayObject *)Harr, NPY_ARRAY_OWNDATA); /* Harr->flags |= NPY_OWNDATA */ /* PyArray_ENABLEFLAGS(Harr, NPY_ARRAY_F_CONTIGUOUS); */ diff --git a/S4/pattern/predicates.c b/S4/pattern/predicates.c index e0a65339..e7d1a193 100644 --- a/S4/pattern/predicates.c +++ b/S4/pattern/predicates.c @@ -116,7 +116,9 @@ #include #include #include +#ifndef _MSC_VER #include +#endif /* On some machines, the exact arithmetic routines might be defeated by the */ /* use of internal extended precision floating-point registers. Sometimes */ diff --git a/examples/C_api/spec.awk b/examples/C_api/spec.awk old mode 100755 new mode 100644 From 673954557c9beab3f18df320f56f1ea4e9c5dd26 Mon Sep 17 00:00:00 2001 From: Marcus Date: Mon, 16 Mar 2020 00:34:01 -0400 Subject: [PATCH 02/11] conda recipes for windows and osx (should in principle also work for linux, can't test) --- conda_recipe/s4/Makefile | 182 ++++++++++++++++++++ conda_recipe/s4/bld.bat | 137 +++++++++++++++ conda_recipe/s4/build.sh | 3 + conda_recipe/s4/conda_build_config_old.yaml | 15 ++ conda_recipe/s4/gensetup.py.sh | 39 +++++ conda_recipe/s4/meta.yaml | 73 ++++++++ 6 files changed, 449 insertions(+) create mode 100644 conda_recipe/s4/Makefile create mode 100644 conda_recipe/s4/bld.bat create mode 100644 conda_recipe/s4/build.sh create mode 100644 conda_recipe/s4/conda_build_config_old.yaml create mode 100644 conda_recipe/s4/gensetup.py.sh create mode 100644 conda_recipe/s4/meta.yaml diff --git a/conda_recipe/s4/Makefile b/conda_recipe/s4/Makefile new file mode 100644 index 00000000..ed9c37f0 --- /dev/null +++ b/conda_recipe/s4/Makefile @@ -0,0 +1,182 @@ +CONDA_INC = $(CONDA_PREFIX)/lib/ +CONDA_LIB = $(CONDA_PREFIX)/lib/ + +# BLAS_LIB = -L$(CONDA_LIB) -lmkl_core_dll -lmkl_rt +# LAPACK_LIB = -L$(CONDA_LIB) -lmkl_core_dll -lmkl_rt + +BLAS_LIB = -L$(CONDA_LIB) -lopenblas +LAPACK_LIB = -L$(CONDA_LIB) -lopenblas + +# FFTW3_INC = -I$(CONDA_INC)/include/ +# FFTW3_LIB = -L$(CONDA_LIB) -lfftw3 + +PTHREAD_INC = -DHAVE_UNISTD_H +# PTHREAD_LIB = -lpthread + +CHOLMOD_INC = -I$(CONDA_INC) +CHOLMOD_LIB = -L$(CONDA_LIB) -lcholmod -lamd -lcolamd -lcamd -lccolamd + +BOOST_INC = -I$(CONDA_PREFIX)/include +BOOST_LIBS = -L$(CONDA_LIB) -lboost_serialization + +# Specify custom compilers if needed +CXX = g++ +CC = gcc + +#CFLAGS += -O3 -fPIC +CFLAGS = -Wall -O3 -m64 -msse3 -msse2 -msse -fPIC + +OPTFLAGS = -O3 + +OBJDIR = ./build +S4_BINNAME = $(OBJDIR)/S4 +S4_LIBNAME = $(OBJDIR)/libS4.a +S4r_LIBNAME = $(OBJDIR)/libS4r.a + +CPPFLAGS = -Wall -I. -IS4 -IS4/RNP -IS4/kiss_fft + +ifdef BOOST_INC + CPPFLAGS += $(BOOST_INC) $(BOOST_LIBS) +endif + +ifdef BLAS_LIB +CPPFLAGS += -DHAVE_BLAS +endif + +ifdef LAPACK_LIB +CPPFLAGS += -DHAVE_LAPACK +endif + +ifdef FFTW3_LIB +CPPFLAGS += -DHAVE_FFTW3 $(FFTW3_INC) +endif + +ifdef PTHREAD_LIB +CPPFLAGS += -DHAVE_LIBPTHREAD $(PTHREAD_INC) +endif + +ifdef CHOLMOD_LIB +CPPFLAGS += -DHAVE_LIBCHOLMOD $(CHOLMOD_INC) +endif + +ifdef MPI_LIB +CPPFLAGS += -DHAVE_MPI $(MPI_INC) +endif + +LIBS = $(BLAS_LIB) $(LAPACK_LIB) $(FFTW3_LIB) $(PTHREAD_LIB) $(CHOLMOD_LIB) $(MPI_LIB) $(BOOST_LIBS) + +#### Compilation targets + +all: $(S4_LIBNAME) + +objdir: + mkdir -p $(OBJDIR) + mkdir -p $(OBJDIR)/S4k + mkdir -p $(OBJDIR)/S4r + mkdir -p $(OBJDIR)/modules + +S4_LIBOBJS = \ + $(OBJDIR)/S4k/S4.o \ + $(OBJDIR)/S4k/rcwa.o \ + $(OBJDIR)/S4k/fmm_common.o \ + $(OBJDIR)/S4k/fmm_FFT.o \ + $(OBJDIR)/S4k/fmm_kottke.o \ + $(OBJDIR)/S4k/fmm_closed.o \ + $(OBJDIR)/S4k/fmm_PolBasisNV.o \ + $(OBJDIR)/S4k/fmm_PolBasisVL.o \ + $(OBJDIR)/S4k/fmm_PolBasisJones.o \ + $(OBJDIR)/S4k/fmm_experimental.o \ + $(OBJDIR)/S4k/fft_iface.o \ + $(OBJDIR)/S4k/pattern.o \ + $(OBJDIR)/S4k/intersection.o \ + $(OBJDIR)/S4k/predicates.o \ + $(OBJDIR)/S4k/numalloc.o \ + $(OBJDIR)/S4k/gsel.o \ + $(OBJDIR)/S4k/sort.o \ + $(OBJDIR)/S4k/kiss_fft.o \ + $(OBJDIR)/S4k/kiss_fftnd.o \ + $(OBJDIR)/S4k/SpectrumSampler.o \ + $(OBJDIR)/S4k/cubature.o \ + $(OBJDIR)/S4k/Interpolator.o \ + $(OBJDIR)/S4k/convert.o + +S4r_LIBOBJS = \ + $(OBJDIR)/S4r/Material.o \ + $(OBJDIR)/S4r/LatticeGridRect.o \ + $(OBJDIR)/S4r/LatticeGridArb.o \ + $(OBJDIR)/S4r/POFF2Mesh.o \ + $(OBJDIR)/S4r/PeriodicMesh.o \ + $(OBJDIR)/S4r/Shape.o \ + $(OBJDIR)/S4r/Simulation.o \ + $(OBJDIR)/S4r/Layer.o \ + $(OBJDIR)/S4r/Pseudoinverse.o \ + $(OBJDIR)/S4r/Eigensystems.o \ + $(OBJDIR)/S4r/IRA.o \ + $(OBJDIR)/S4r/intersection.o \ + $(OBJDIR)/S4r/predicates.o \ + $(OBJDIR)/S4r/periodic_off2.o + +ifndef LAPACK_LIB + S4_LIBOBJS += $(OBJDIR)/S4k/Eigensystems.o +endif + +$(S4_LIBNAME): objdir $(S4_LIBOBJS) + $(AR) crvs $@ $(S4_LIBOBJS) + +$(OBJDIR)/S4k/S4.o: S4/S4.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/rcwa.o: S4/rcwa.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_common.o: S4/fmm/fmm_common.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_FFT.o: S4/fmm/fmm_FFT.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_kottke.o: S4/fmm/fmm_kottke.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_closed.o: S4/fmm/fmm_closed.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_PolBasisNV.o: S4/fmm/fmm_PolBasisNV.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_PolBasisVL.o: S4/fmm/fmm_PolBasisVL.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_PolBasisJones.o: S4/fmm/fmm_PolBasisJones.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fmm_experimental.o: S4/fmm/fmm_experimental.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/fft_iface.o: S4/fmm/fft_iface.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/pattern.o: S4/pattern/pattern.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/intersection.o: S4/pattern/intersection.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/predicates.o: S4/pattern/predicates.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/numalloc.o: S4/numalloc.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/gsel.o: S4/gsel.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/sort.o: S4/sort.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/kiss_fft.o: S4/kiss_fft/kiss_fft.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/kiss_fftnd.o: S4/kiss_fft/tools/kiss_fftnd.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/SpectrumSampler.o: S4/SpectrumSampler.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/cubature.o: S4/cubature.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/Interpolator.o: S4/Interpolator.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/convert.o: S4/convert.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ +$(OBJDIR)/S4k/Eigensystems.o: S4/RNP/Eigensystems.cpp + $(CXX) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ + +#### Python extension + +S4_pyext: objdir $(S4_LIBNAME) + sh conda_recipe/s4/gensetup.py.sh $(OBJDIR) $(S4_LIBNAME) "$(LIBS)" $(CONDA_LIB) + $(PYTHON) setup.py install + +clean: + rm -rf $(OBJDIR) diff --git a/conda_recipe/s4/bld.bat b/conda_recipe/s4/bld.bat new file mode 100644 index 00000000..0b305cdc --- /dev/null +++ b/conda_recipe/s4/bld.bat @@ -0,0 +1,137 @@ +@echo off + +setlocal EnableDelayedExpansion + +::if "%PY_VER%"=="2.7" ( +:: set MSVC_VER=9.0 +:: set LIB_VER=90 +::) else if "%PY_VER%"=="3.4" ( +:: set MSVC_VER=10.0 +:: set LIB_VER=100 +::) else ( +:: set MSVC_VER=14.0 +:: set LIB_VER=140 +::) + +CALL "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 + +set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" +set "BLAS_LIB=mkl_core_dll mkl_rt" +set "LAPACK_PATH=%CONDA_PREFIX%/Library/lib/" +set "LAPACK_LIB=mkl_core_dll mkl_rt" +::set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" +::set "BLAS_LIB=openblas" +::set "LAPACK_PATH=%CONDA_PREFIX%/Library/lib/" +::set "LAPACK_LIB=openblas" + +::set "FFTW3_INC=-I%CONDA_PREFIX%/include/" +::set "FFTW3_PATH=%CONDA_PREFIX%/Library/lib/" +::set "FFTW3_LIB=fftw3" + +::set "PTHREAD_PATH=%CONDA_PREFIX%/Library/lib/" +::set "PTHREAD_LIB=pthread" + +set "CHOLMOD_INC=-I%CONDA_PREFIX%\Library\include\suitesparse" +set "CHOLMOD_PATH=%CONDA_PREFIX%\Library\lib\" +set "CHOLMOD_LIB=cholmod amd colamd camd ccolamd suitesparseconfig metis" + +set "BOOST_INC=-I%CONDA_PREFIX%\Library\include\" +set "BOOST_PATH=%CONDA_PREFIX%\Library\lib\" +set "BOOST_LIB=" + +:: Specify custom compilers if needed +set "CXX=cl /O2" +set "CC=cl /O2" + +set "OBJDIR=build" +set "S4_BINNAME=%OBJDIR%/S4" +set "S4_LIBNAME=%OBJDIR%/S4.lib" + +set "CPPFLAGS=-EHsc -MD -I. -IS4 -IS4/RNP -IS4/kiss_fft" +set "CPPFLAGS=%CPPFLAGS% %BOOST_INC%" +set "CPPFLAGS=%CPPFLAGS% -DHAVE_BLAS" +set "CPPFLAGS=%CPPFLAGS% -DHAVE_LAPACK" +::set "CPPFLAGS=%CPPFLAGS% -DHAVE_FFTW3 %FFTW3_INC%" +:: set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBPTHREAD %PTHREAD_INC%" +set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBCHOLMOD %CHOLMOD_INC%" + +set "LIBS=%BLAS_LIB% %LAPACK_LIB% %FFTW3_LIB% %PTHREAD_LIB% %CHOLMOD_LIB% %BOOST_LIB%" +set "LIBPATHS=%BLAS_PATH% %LAPACK_PATH% %FFTW3_PATH% %PTHREAD_PATH% %CHOLMOD_PATH% %BOOST_PATH%" +set "S4_LIBOBJS=%OBJDIR%/S4k/S4.obj %OBJDIR%/S4k/rcwa.obj %OBJDIR%/S4k/fmm_common.obj %OBJDIR%/S4k/fmm_FFT.obj %OBJDIR%/S4k/fmm_kottke.obj %OBJDIR%/S4k/fmm_closed.obj %OBJDIR%/S4k/fmm_PolBasisNV.obj %OBJDIR%/S4k/fmm_PolBasisVL.obj %OBJDIR%/S4k/fmm_PolBasisJones.obj %OBJDIR%/S4k/fmm_experimental.obj %OBJDIR%/S4k/fft_iface.obj %OBJDIR%/S4k/pattern.obj %OBJDIR%/S4k/intersection.obj %OBJDIR%/S4k/predicates.obj %OBJDIR%/S4k/numalloc.obj %OBJDIR%/S4k/gsel.obj %OBJDIR%/S4k/sort.obj %OBJDIR%/S4k/kiss_fft.obj %OBJDIR%/S4k/kiss_fftnd.obj %OBJDIR%/S4k/SpectrumSampler.obj %OBJDIR%/S4k/cubature.obj %OBJDIR%/S4k/Interpolator.obj %OBJDIR%/S4k/convert.obj" + +:: Make a build folder and change to it. +::cd S4 +::cd S4 +rmdir /Q /s %OBJDIR% +:: +mkdir %OBJDIR% +cd %OBJDIR% +mkdir S4k +mkdir S4r +mkdir modules +cd .. +%CXX% -c %CPPFLAGS% S4/S4.cpp -Fo%OBJDIR%/S4k/S4.obj +%CXX% -c %CPPFLAGS% S4/rcwa.cpp -Fo%OBJDIR%/S4k/rcwa.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_common.cpp -Fo%OBJDIR%/S4k/fmm_common.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_FFT.cpp -Fo%OBJDIR%/S4k/fmm_FFT.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_kottke.cpp -Fo%OBJDIR%/S4k/fmm_kottke.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_closed.cpp -Fo%OBJDIR%/S4k/fmm_closed.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_PolBasisNV.cpp -Fo%OBJDIR%/S4k/fmm_PolBasisNV.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_PolBasisVL.cpp -Fo%OBJDIR%/S4k/fmm_PolBasisVL.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_PolBasisJones.cpp -Fo%OBJDIR%/S4k/fmm_PolBasisJones.obj +%CXX% -c %CPPFLAGS% S4/fmm/fmm_experimental.cpp -Fo%OBJDIR%/S4k/fmm_experimental.obj +%CXX% -c %CPPFLAGS% S4/fmm/fft_iface.cpp -Fo%OBJDIR%/S4k/fft_iface.obj +%CC% -c %CPPFLAGS% S4/pattern/pattern.c -Fo%OBJDIR%/S4k/pattern.obj +%CC% -c %CPPFLAGS% S4/pattern/intersection.c -Fo%OBJDIR%/S4k/intersection.obj +%CC% -c %CPPFLAGS% S4/pattern/predicates.c -Fo%OBJDIR%/S4k/predicates.obj +%CC% -c %CPPFLAGS% S4/numalloc.c -Fo%OBJDIR%/S4k/numalloc.obj +%CC% -c %CPPFLAGS% S4/gsel.c -Fo%OBJDIR%/S4k/gsel.obj +%CC% -c %CPPFLAGS% S4/sort.c -Fo%OBJDIR%/S4k/sort.obj +%CC% -c %CPPFLAGS% S4/kiss_fft/kiss_fft.c -Fo%OBJDIR%/S4k/kiss_fft.obj +%CC% -c %CPPFLAGS% S4/kiss_fft/tools/kiss_fftnd.c -Fo%OBJDIR%/S4k/kiss_fftnd.obj +%CC% -c %CPPFLAGS% S4/SpectrumSampler.c -Fo%OBJDIR%/S4k/SpectrumSampler.obj +%CC% -c %CPPFLAGS% S4/cubature.c -Fo%OBJDIR%/S4k/cubature.obj +%CC% -c %CPPFLAGS% S4/Interpolator.c -Fo%OBJDIR%/S4k/Interpolator.obj +%CC% -c %CPPFLAGS% S4/convert.c -Fo%OBJDIR%/S4k/convert.obj +%CXX% -c %CPPFLAGS% S4/RNP/Eigensystems.cpp -Fo%OBJDIR%/S4k/Eigensystems.obj + +@echo on +lib.exe %OBJDIR%/S4k/* /out:%S4_LIBNAME% + + + +set "LIBS=%LIBS:\=/%" +set "LIBPATHS=%LIBPATHS:\=/%" +set "OBJDIR=%OBJDIR:\=/%" +set "S4_LIBNAME=%S4_LIBNAME:\=/%" + +(echo:from distutils.core import setup, Extension +echo:import numpy as np +echo: +echo:lib_dirs = ['%OBJDIR%'] +echo:lib_dirs.extend^([libpath for libpath in '%LIBPATHS%'.split^(^)]^) +echo:libs = ['S4'] +echo:libs.extend^([lib for lib in '%LIBS%'.split^(^)]^) +echo:include_dirs = [np.get_include^(^)] +echo:extra_compile_args = ["-O2"] +echo:print^(lib_dirs^) +echo:print^(libs^) +echo:print^(include_dirs^) +echo:S4module = Extension^('S4', +echo: sources = ['S4/main_python.c'], +echo: libraries = libs, +echo: library_dirs = lib_dirs, +echo: include_dirs = include_dirs, +echo: extra_objects = ['%S4_LIBNAME%'], +echo: extra_compile_args = extra_compile_args +echo:^) +echo: +echo:setup^(name = 'S4', +echo: version = '1.1', +echo: description = 'Stanford Stratified Structure Solver ^(S4^): Fourier Modal Method', +echo: ext_modules = [S4module] +echo:^))>setup.py + +IF DEFINED PYTHON ("%PYTHON%" setup.py install) ELSE (python setup.py install) + + diff --git a/conda_recipe/s4/build.sh b/conda_recipe/s4/build.sh new file mode 100644 index 00000000..55382e3d --- /dev/null +++ b/conda_recipe/s4/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash +make -f conda_recipe/s4/Makefile clean +make -f conda_recipe/s4/Makefile S4_pyext \ No newline at end of file diff --git a/conda_recipe/s4/conda_build_config_old.yaml b/conda_recipe/s4/conda_build_config_old.yaml new file mode 100644 index 00000000..162b710e --- /dev/null +++ b/conda_recipe/s4/conda_build_config_old.yaml @@ -0,0 +1,15 @@ +python: + - 3.5 + - 3.6 + - 3.7 + - 3.8 + +numpy: + - 1.11 + - 1.12 + - 1.13 + - 1.14 + - 1.15 + - 1.16 + - 1.17 + - 1.18 \ No newline at end of file diff --git a/conda_recipe/s4/gensetup.py.sh b/conda_recipe/s4/gensetup.py.sh new file mode 100644 index 00000000..88d9a459 --- /dev/null +++ b/conda_recipe/s4/gensetup.py.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +OBJDIR="$1" +LIBFILE="$2" +LIBS="$3" +BOOST_LIBS="$4" + +echo "LIBFILE: $LIBFILE" + +cat < setup.py +from distutils.core import setup, Extension +import numpy as np +#import os +#os.environ["CC"] = "g++" +#os.environ["CXX"] = "g++" + +libs = ['S4', 'stdc++'] +lib_dirs = ['$OBJDIR', '$BOOST_LIBS'] +libs.extend([lib[2::] for lib in '$LIBS'.split()]) +include_dirs = ['$BOOST_PREFIX/include', np.get_include()] +extra_link_args = ['$LIBFILE'] + +S4module = Extension('S4', + sources = ['S4/main_python.c'], + libraries = libs, + library_dirs = lib_dirs, + include_dirs = include_dirs, + extra_objects = ['$LIBFILE'], + # extra_link_args = extra_link_args, + runtime_library_dirs=['$BOOST_PREFIX/lib'], + extra_compile_args=['-std=gnu99'] +) + +setup(name = 'S4', + version = '1.1', + description = 'Stanford Stratified Structure Solver (S4): Fourier Modal Method', + ext_modules = [S4module] +) +SETUPPY diff --git a/conda_recipe/s4/meta.yaml b/conda_recipe/s4/meta.yaml new file mode 100644 index 00000000..d7197070 --- /dev/null +++ b/conda_recipe/s4/meta.yaml @@ -0,0 +1,73 @@ + +package: + name: s4 + version: 1.1 + +source: + path: ../../ + #url: https://github.com/simplejson/simplejson/releases/download/{{ version }}/simplejson-{{ version }}.tar.gz + # and otherwise fall back to archive: + #sha256: d58439c548433adcda98e695be53e526ba940a4b9c44fb9a05d92cd495cdd47f + # sha256 is the preferred checksum -- you can get it for a file with: + # `openssl sha256 `. + +build: + number: 0 + skip: True # [py<37] + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - python {{ python }} + - setuptools + - numpy {{ numpy }} + - boost + - suitesparse + - mkl-devel [win] + - openblas [unix] + + host: + - python {{ python }} + - numpy {{ numpy }} + - boost + - suitesparse + - mkl-devel [win] + - openblas [unix] + + run: + - python {{ python }} + - {{ pin_compatible('numpy') }} + - {{ pin_compatible('boost') }} + - {{ pin_compatible('suitesparse') }} + - {{ pin_compatible('mkl-devel') }} [win] + - {{ pin_compatible('openblas') }} [unix] + +test: + imports: + - S4 + +about: + home: https://web.stanford.edu/group/fan/S4/index.html + license: GPL-2.0 + license_family: GPL + # license_file: LICENSE.txt + summary: 'S4 (Stanford Stratified Structure Solver) Conda Package' + + # The remaining entries in this section are optional, but recommended. + description: | + A program for computing electromagnetic fields in periodic, layered + structures, developed by Victor Liu (victorliu@alumni.stanford.edu) of the + Fan group in the Stanford Electrical Engineering Department. + See the S4 manual, in doc/index.html, for a complete + description of the package and its user interface, as well as + installation instructions, the license and copyright, contact + addresses, and other important information. + doc_url: https://web.stanford.edu/group/fan/S4/index.html + dev_url: https://web.stanford.edu/group/fan/S4/index.html + +extra: + recipe-maintainers: + # GitHub IDs for maintainers of the recipe. + # Always check with the people listed below if they are OK becoming maintainers of the recipe. (There will be spam!) + - marcus-o \ No newline at end of file From 64d2d2505cddbcf41f0aa2ea6b47c75e6e98e9aa Mon Sep 17 00:00:00 2001 From: marcus-o <33861898+marcus-o@users.noreply.github.com> Date: Mon, 16 Mar 2020 00:45:54 -0400 Subject: [PATCH 03/11] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c0b42aa..4dca6bf6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ -# Detailed installation instructions (64-bit Ubuntu 16 or 18): +# Installation instructions: +In principle you should be able to install this on Windows and OS X using conda and 'conda install -c marcus-o s4'. +Install in a new environment to avoid incompatabilities. +I don't have a large test base, so I would appreciate if you let me know if it works (marcus.ossiander at gmail) or not, then I'll try to help). +# Detailed installation instructions (64-bit Ubuntu 16 or 18): ## Key steps: ``` From 1b7d8ef24e665d74e007677ed292359a01f271ef Mon Sep 17 00:00:00 2001 From: marcus-o <33861898+marcus-o@users.noreply.github.com> Date: Mon, 16 Mar 2020 00:50:15 -0400 Subject: [PATCH 04/11] Update meta.yaml --- conda_recipe/s4/meta.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/conda_recipe/s4/meta.yaml b/conda_recipe/s4/meta.yaml index d7197070..17e36613 100644 --- a/conda_recipe/s4/meta.yaml +++ b/conda_recipe/s4/meta.yaml @@ -5,11 +5,6 @@ package: source: path: ../../ - #url: https://github.com/simplejson/simplejson/releases/download/{{ version }}/simplejson-{{ version }}.tar.gz - # and otherwise fall back to archive: - #sha256: d58439c548433adcda98e695be53e526ba940a4b9c44fb9a05d92cd495cdd47f - # sha256 is the preferred checksum -- you can get it for a file with: - # `openssl sha256 `. build: number: 0 @@ -68,6 +63,4 @@ about: extra: recipe-maintainers: - # GitHub IDs for maintainers of the recipe. - # Always check with the people listed below if they are OK becoming maintainers of the recipe. (There will be spam!) - - marcus-o \ No newline at end of file + - marcus-o From 4ad000af4fc6501aeceb62799e30250ab512abd0 Mon Sep 17 00:00:00 2001 From: marcus-o <33861898+marcus-o@users.noreply.github.com> Date: Mon, 16 Mar 2020 00:53:46 -0400 Subject: [PATCH 05/11] Update meta.yaml --- conda_recipe/s4/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda_recipe/s4/meta.yaml b/conda_recipe/s4/meta.yaml index 17e36613..0e78d74f 100644 --- a/conda_recipe/s4/meta.yaml +++ b/conda_recipe/s4/meta.yaml @@ -51,6 +51,8 @@ about: # The remaining entries in this section are optional, but recommended. description: | + These are conda packages of the below software. + Victor Liu wrote the initial code but is not responsible for these packages. A program for computing electromagnetic fields in periodic, layered structures, developed by Victor Liu (victorliu@alumni.stanford.edu) of the Fan group in the Stanford Electrical Engineering Department. From 3c05083100f4a975a95834c0b239d09016d8524d Mon Sep 17 00:00:00 2001 From: marcus-o <33861898+marcus-o@users.noreply.github.com> Date: Mon, 16 Mar 2020 16:37:34 -0400 Subject: [PATCH 06/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4dca6bf6..7f4f7bce 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Installation instructions: -In principle you should be able to install this on Windows and OS X using conda and 'conda install -c marcus-o s4'. +In principle you should be able to install this on Windows / OS X / Linux using conda and 'conda install -c marcus-o s4'. Install in a new environment to avoid incompatabilities. I don't have a large test base, so I would appreciate if you let me know if it works (marcus.ossiander at gmail) or not, then I'll try to help). From 8b70e5aa1b65bdf915796066c0e747b1549aa7c9 Mon Sep 17 00:00:00 2001 From: marcus-o <33861898+marcus-o@users.noreply.github.com> Date: Thu, 19 Mar 2020 11:49:18 -0400 Subject: [PATCH 07/11] Delete conda_build_config_old.yaml --- conda_recipe/s4/conda_build_config_old.yaml | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 conda_recipe/s4/conda_build_config_old.yaml diff --git a/conda_recipe/s4/conda_build_config_old.yaml b/conda_recipe/s4/conda_build_config_old.yaml deleted file mode 100644 index 162b710e..00000000 --- a/conda_recipe/s4/conda_build_config_old.yaml +++ /dev/null @@ -1,15 +0,0 @@ -python: - - 3.5 - - 3.6 - - 3.7 - - 3.8 - -numpy: - - 1.11 - - 1.12 - - 1.13 - - 1.14 - - 1.15 - - 1.16 - - 1.17 - - 1.18 \ No newline at end of file From a3d46a6e255a90c2e4cae01ec5bbc8f9fdf9e172 Mon Sep 17 00:00:00 2001 From: Marcus Date: Thu, 2 Apr 2020 17:46:41 -0400 Subject: [PATCH 08/11] revert main_python.c and changes to conda recipe --- S4/main_python.c | 6 ++---- conda_recipe/s4/bld.bat | 14 +++++++------- conda_recipe/s4/conda_build_config.yaml | 5 +++++ conda_recipe/s4/meta.yaml | 24 +++++++++++++----------- 4 files changed, 27 insertions(+), 22 deletions(-) create mode 100644 conda_recipe/s4/conda_build_config.yaml diff --git a/S4/main_python.c b/S4/main_python.c index 536f6965..376f6bbc 100644 --- a/S4/main_python.c +++ b/S4/main_python.c @@ -17,8 +17,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION - #include "Python.h" #include "numpy/arrayobject.h" @@ -1488,7 +1486,7 @@ static PyObject *S4Sim_GetFieldsOnGridNumpy(S4Sim *self, PyObject *args, PyObjec /* strides[0] = strides[1]; */ /* strides[1] = temp; */ /* PyArray_UpdateFlags(Earr, NPY_ARRAY_UPDATE_ALL); */ - PyArray_ENABLEFLAGS((PyArrayObject *)Earr, NPY_ARRAY_OWNDATA); + PyArray_ENABLEFLAGS(Earr, NPY_ARRAY_OWNDATA); /* PyArray_ENABLEFLAGS(Earr, NPY_ARRAY_F_CONTIGUOUS); */ PyObject *Harr; Harr = PyArray_SimpleNewFromData(3, dims, NPY_COMPLEX128, Hfields); @@ -1499,7 +1497,7 @@ static PyObject *S4Sim_GetFieldsOnGridNumpy(S4Sim *self, PyObject *args, PyObjec /* strides[1] = temp; */ /* PyArray_UpdateFlags(Harr, NPY_ARRAY_UPDATE_ALL); */ - PyArray_ENABLEFLAGS((PyArrayObject *)Harr, NPY_ARRAY_OWNDATA); + PyArray_ENABLEFLAGS(Harr, NPY_ARRAY_OWNDATA); /* Harr->flags |= NPY_OWNDATA */ /* PyArray_ENABLEFLAGS(Harr, NPY_ARRAY_F_CONTIGUOUS); */ diff --git a/conda_recipe/s4/bld.bat b/conda_recipe/s4/bld.bat index 0b305cdc..4d6b4e14 100644 --- a/conda_recipe/s4/bld.bat +++ b/conda_recipe/s4/bld.bat @@ -13,7 +13,9 @@ setlocal EnableDelayedExpansion :: set LIB_VER=140 ::) -CALL "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 +set DISTUTILS_USE_SDK=1 +::CALL "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 +CALL "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build/vcvarsall.bat" amd64 -vcvars_ver=14.0 set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" set "BLAS_LIB=mkl_core_dll mkl_rt" @@ -40,14 +42,14 @@ set "BOOST_PATH=%CONDA_PREFIX%\Library\lib\" set "BOOST_LIB=" :: Specify custom compilers if needed -set "CXX=cl /O2" -set "CC=cl /O2" +set "CXX=cl /O2 " +set "CC=cl /O2 " set "OBJDIR=build" set "S4_BINNAME=%OBJDIR%/S4" set "S4_LIBNAME=%OBJDIR%/S4.lib" -set "CPPFLAGS=-EHsc -MD -I. -IS4 -IS4/RNP -IS4/kiss_fft" +set "CPPFLAGS=-EHa -MD -I. -IS4 -IS4/RNP -IS4/kiss_fft" set "CPPFLAGS=%CPPFLAGS% %BOOST_INC%" set "CPPFLAGS=%CPPFLAGS% -DHAVE_BLAS" set "CPPFLAGS=%CPPFLAGS% -DHAVE_LAPACK" @@ -57,7 +59,6 @@ set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBCHOLMOD %CHOLMOD_INC%" set "LIBS=%BLAS_LIB% %LAPACK_LIB% %FFTW3_LIB% %PTHREAD_LIB% %CHOLMOD_LIB% %BOOST_LIB%" set "LIBPATHS=%BLAS_PATH% %LAPACK_PATH% %FFTW3_PATH% %PTHREAD_PATH% %CHOLMOD_PATH% %BOOST_PATH%" -set "S4_LIBOBJS=%OBJDIR%/S4k/S4.obj %OBJDIR%/S4k/rcwa.obj %OBJDIR%/S4k/fmm_common.obj %OBJDIR%/S4k/fmm_FFT.obj %OBJDIR%/S4k/fmm_kottke.obj %OBJDIR%/S4k/fmm_closed.obj %OBJDIR%/S4k/fmm_PolBasisNV.obj %OBJDIR%/S4k/fmm_PolBasisVL.obj %OBJDIR%/S4k/fmm_PolBasisJones.obj %OBJDIR%/S4k/fmm_experimental.obj %OBJDIR%/S4k/fft_iface.obj %OBJDIR%/S4k/pattern.obj %OBJDIR%/S4k/intersection.obj %OBJDIR%/S4k/predicates.obj %OBJDIR%/S4k/numalloc.obj %OBJDIR%/S4k/gsel.obj %OBJDIR%/S4k/sort.obj %OBJDIR%/S4k/kiss_fft.obj %OBJDIR%/S4k/kiss_fftnd.obj %OBJDIR%/S4k/SpectrumSampler.obj %OBJDIR%/S4k/cubature.obj %OBJDIR%/S4k/Interpolator.obj %OBJDIR%/S4k/convert.obj" :: Make a build folder and change to it. ::cd S4 @@ -93,13 +94,12 @@ cd .. %CC% -c %CPPFLAGS% S4/cubature.c -Fo%OBJDIR%/S4k/cubature.obj %CC% -c %CPPFLAGS% S4/Interpolator.c -Fo%OBJDIR%/S4k/Interpolator.obj %CC% -c %CPPFLAGS% S4/convert.c -Fo%OBJDIR%/S4k/convert.obj -%CXX% -c %CPPFLAGS% S4/RNP/Eigensystems.cpp -Fo%OBJDIR%/S4k/Eigensystems.obj +IF DEFINED LAPACK_LIB (echo "using lapack") ELSE (%CXX% -c %CPPFLAGS% S4/RNP/Eigensystems.cpp -Fo%OBJDIR%/S4k/Eigensystems.obj) @echo on lib.exe %OBJDIR%/S4k/* /out:%S4_LIBNAME% - set "LIBS=%LIBS:\=/%" set "LIBPATHS=%LIBPATHS:\=/%" set "OBJDIR=%OBJDIR:\=/%" diff --git a/conda_recipe/s4/conda_build_config.yaml b/conda_recipe/s4/conda_build_config.yaml new file mode 100644 index 00000000..07b65fe8 --- /dev/null +++ b/conda_recipe/s4/conda_build_config.yaml @@ -0,0 +1,5 @@ +python: + - 3.7 + - 3.8 +numpy: + - 1.18 \ No newline at end of file diff --git a/conda_recipe/s4/meta.yaml b/conda_recipe/s4/meta.yaml index d7197070..a05bb6a1 100644 --- a/conda_recipe/s4/meta.yaml +++ b/conda_recipe/s4/meta.yaml @@ -12,35 +12,34 @@ source: # `openssl sha256 `. build: - number: 0 - skip: True # [py<37] + number: 2 requirements: build: - {{ compiler('c') }} - {{ compiler('cxx') }} - - python {{ python }} + - python - setuptools - - numpy {{ numpy }} - - boost + - numpy + - libboost - suitesparse - mkl-devel [win] - openblas [unix] host: - - python {{ python }} - - numpy {{ numpy }} - - boost + - python + - numpy + - libboost - suitesparse - mkl-devel [win] - openblas [unix] run: - - python {{ python }} + - python - {{ pin_compatible('numpy') }} - - {{ pin_compatible('boost') }} +# - {{ pin_compatible('boost') }} - {{ pin_compatible('suitesparse') }} - - {{ pin_compatible('mkl-devel') }} [win] + - {{ pin_compatible('mkl') }} [win] - {{ pin_compatible('openblas') }} [unix] test: @@ -56,6 +55,9 @@ about: # The remaining entries in this section are optional, but recommended. description: | + These are conda packages of the below software. Victor Liu wrote the + initial code but is not responsible for these packages. + A program for computing electromagnetic fields in periodic, layered structures, developed by Victor Liu (victorliu@alumni.stanford.edu) of the Fan group in the Stanford Electrical Engineering Department. From 4858894d625a525e4c741b8e9776fbde0a5d0c27 Mon Sep 17 00:00:00 2001 From: marcus-o Date: Fri, 17 Apr 2020 12:32:11 -0400 Subject: [PATCH 09/11] a sign in fmm_polbasisVL.cpp caused a difference between fftw3 and kissfft the compile flag for fftw3 in fft_iface.cpp was wrong so fftw3 was never used there is no speed difference b/w kissfft and mkl fftw in my case --- S4/fmm/fft_iface.cpp | 14 ++++++------ S4/fmm/fmm_PolBasisVL.cpp | 2 +- conda_recipe/s4/bld.bat | 45 +++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/S4/fmm/fft_iface.cpp b/S4/fmm/fft_iface.cpp index 4bb208d4..e354e02d 100644 --- a/S4/fmm/fft_iface.cpp +++ b/S4/fmm/fft_iface.cpp @@ -46,7 +46,7 @@ int fft_next_fast_size(int n){ } std::complex *fft_alloc_complex(size_t n){ -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 return (std::complex*)(fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n); #else return (std::complex*)KISS_FFT_MALLOC(sizeof(std::complex) * n); @@ -54,7 +54,7 @@ std::complex *fft_alloc_complex(size_t n){ } void fft_free(void *p){ -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 fftw_free(p); #else KISS_FFT_FREE(p); @@ -62,7 +62,7 @@ void fft_free(void *p){ } struct tag_fft_plan{ -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 fftw_plan plan; #else kiss_fftnd_cfg cfg; @@ -76,7 +76,7 @@ fft_plan fft_plan_dft_2d( int sign ){ fft_plan plan = NULL; -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 # ifdef HAVE_LIBPTHREAD pthread_mutex_lock(&mutex); # endif @@ -103,7 +103,7 @@ fft_plan fft_plan_dft_2d( } void fft_plan_exec(const fft_plan plan){ -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 fftw_execute(plan->plan); #else kiss_fftnd(plan->cfg, (const kiss_fft_cpx *)plan->in, (kiss_fft_cpx *)plan->out); @@ -112,7 +112,7 @@ void fft_plan_exec(const fft_plan plan){ void fft_plan_destroy(fft_plan plan){ if(NULL == plan){ return; } -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 # ifdef HAVE_LIBPTHREAD pthread_mutex_lock(&mutex); # endif @@ -135,7 +135,7 @@ void fft_init(){ } void fft_destroy(){ -#ifdef HAVE_LIBFFTW3 +#ifdef HAVE_FFTW3 fftw_cleanup(); #endif #ifdef HAVE_LIBPTHREAD diff --git a/S4/fmm/fmm_PolBasisVL.cpp b/S4/fmm/fmm_PolBasisVL.cpp index c84f8b50..de9e2afa 100644 --- a/S4/fmm/fmm_PolBasisVL.cpp +++ b/S4/fmm/fmm_PolBasisVL.cpp @@ -238,7 +238,7 @@ int FMMGetEpsilon_PolBasisVL(const Simulation *S, const Layer *L, const int n, s } - fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, -1); + fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, 1); // We fill in the quarter blocks of F in Fortran order for(int w = 0; w < 4; ++w){ diff --git a/conda_recipe/s4/bld.bat b/conda_recipe/s4/bld.bat index 4d6b4e14..75fb11f9 100644 --- a/conda_recipe/s4/bld.bat +++ b/conda_recipe/s4/bld.bat @@ -18,17 +18,13 @@ set DISTUTILS_USE_SDK=1 CALL "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build/vcvarsall.bat" amd64 -vcvars_ver=14.0 set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" -set "BLAS_LIB=mkl_core_dll mkl_rt" +set "BLAS_LIB=mkl_rt" set "LAPACK_PATH=%CONDA_PREFIX%/Library/lib/" -set "LAPACK_LIB=mkl_core_dll mkl_rt" -::set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" -::set "BLAS_LIB=openblas" -::set "LAPACK_PATH=%CONDA_PREFIX%/Library/lib/" -::set "LAPACK_LIB=openblas" +set "LAPACK_LIB=mkl_rt" -::set "FFTW3_INC=-I%CONDA_PREFIX%/include/" -::set "FFTW3_PATH=%CONDA_PREFIX%/Library/lib/" -::set "FFTW3_LIB=fftw3" +set "FFTW3_INC=-I%CONDA_PREFIX%/Library/include/fftw" +set "FFTW3_PATH=%CONDA_PREFIX%/Library/lib/" +set "FFTW3_LIB=mkl_rt" ::set "PTHREAD_PATH=%CONDA_PREFIX%/Library/lib/" ::set "PTHREAD_LIB=pthread" @@ -42,20 +38,22 @@ set "BOOST_PATH=%CONDA_PREFIX%\Library\lib\" set "BOOST_LIB=" :: Specify custom compilers if needed -set "CXX=cl /O2 " -set "CC=cl /O2 " +set "CXX=cl /O2" +set "CC=cl /O2" set "OBJDIR=build" set "S4_BINNAME=%OBJDIR%/S4" set "S4_LIBNAME=%OBJDIR%/S4.lib" -set "CPPFLAGS=-EHa -MD -I. -IS4 -IS4/RNP -IS4/kiss_fft" +set "CPPFLAGS=-EHa -LD -MD -DCPU86 -I. -IS4 -IS4/RNP -IS4/kiss_fft" +::enable debug output +::set "CPPFLAGS=%CPPFLAGS% -DENABLE_S4_TRACE" set "CPPFLAGS=%CPPFLAGS% %BOOST_INC%" -set "CPPFLAGS=%CPPFLAGS% -DHAVE_BLAS" -set "CPPFLAGS=%CPPFLAGS% -DHAVE_LAPACK" -::set "CPPFLAGS=%CPPFLAGS% -DHAVE_FFTW3 %FFTW3_INC%" -:: set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBPTHREAD %PTHREAD_INC%" -set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBCHOLMOD %CHOLMOD_INC%" +IF DEFINED BLAS_LIB (set "CPPFLAGS=%CPPFLAGS% -DHAVE_BLAS") +IF DEFINED LAPACK_LIB (set "CPPFLAGS=%CPPFLAGS% -DHAVE_LAPACK") +IF DEFINED FFTW3_LIB (set "CPPFLAGS=%CPPFLAGS% -DHAVE_FFTW3 %FFTW3_INC%") +IF DEFINED PTHREAD_LIB (set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBPTHREAD %PTHREAD_INC%") +IF DEFINED CHOLMOD_LIB (set "CPPFLAGS=%CPPFLAGS% -DHAVE_LIBCHOLMOD %CHOLMOD_INC%") set "LIBS=%BLAS_LIB% %LAPACK_LIB% %FFTW3_LIB% %PTHREAD_LIB% %CHOLMOD_LIB% %BOOST_LIB%" set "LIBPATHS=%BLAS_PATH% %LAPACK_PATH% %FFTW3_PATH% %PTHREAD_PATH% %CHOLMOD_PATH% %BOOST_PATH%" @@ -88,8 +86,8 @@ cd .. %CC% -c %CPPFLAGS% S4/numalloc.c -Fo%OBJDIR%/S4k/numalloc.obj %CC% -c %CPPFLAGS% S4/gsel.c -Fo%OBJDIR%/S4k/gsel.obj %CC% -c %CPPFLAGS% S4/sort.c -Fo%OBJDIR%/S4k/sort.obj -%CC% -c %CPPFLAGS% S4/kiss_fft/kiss_fft.c -Fo%OBJDIR%/S4k/kiss_fft.obj -%CC% -c %CPPFLAGS% S4/kiss_fft/tools/kiss_fftnd.c -Fo%OBJDIR%/S4k/kiss_fftnd.obj +IF DEFINED FFTW3_LIB (echo "using fftw") ELSE (%CC% -c %CPPFLAGS% S4/kiss_fft/kiss_fft.c -Fo%OBJDIR%/S4k/kiss_fft.obj) +IF DEFINED FFTW3_LIB (echo "using fftw") ELSE (%CC% -c %CPPFLAGS% S4/kiss_fft/tools/kiss_fftnd.c -Fo%OBJDIR%/S4k/kiss_fftnd.obj) %CC% -c %CPPFLAGS% S4/SpectrumSampler.c -Fo%OBJDIR%/S4k/SpectrumSampler.obj %CC% -c %CPPFLAGS% S4/cubature.c -Fo%OBJDIR%/S4k/cubature.obj %CC% -c %CPPFLAGS% S4/Interpolator.c -Fo%OBJDIR%/S4k/Interpolator.obj @@ -113,7 +111,11 @@ echo:lib_dirs.extend^([libpath for libpath in '%LIBPATHS%'.split^(^)]^) echo:libs = ['S4'] echo:libs.extend^([lib for lib in '%LIBS%'.split^(^)]^) echo:include_dirs = [np.get_include^(^)] -echo:extra_compile_args = ["-O2"] +echo:extra_compile_args = [] +::fixes the run time mismatch warning +echo:extra_compile_args = ["-LD", "-MD"] +echo:extra_link_args = [] +::echo:extra_link_args = ["/DEBUG", "/verbose:lib"] echo:print^(lib_dirs^) echo:print^(libs^) echo:print^(include_dirs^) @@ -123,7 +125,8 @@ echo: libraries = libs, echo: library_dirs = lib_dirs, echo: include_dirs = include_dirs, echo: extra_objects = ['%S4_LIBNAME%'], -echo: extra_compile_args = extra_compile_args +echo: extra_compile_args = extra_compile_args, +echo: extra_link_args = extra_link_args echo:^) echo: echo:setup^(name = 'S4', From a1b512448c83f75f00b1861d50778d1de2728858 Mon Sep 17 00:00:00 2001 From: marcus-o Date: Thu, 23 Apr 2020 20:14:23 -0400 Subject: [PATCH 10/11] changed win32 macro _win32 to have the right aligned malloc --- S4/main_python.c | 2 +- S4/numalloc.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/S4/main_python.c b/S4/main_python.c index 376f6bbc..0c813c59 100644 --- a/S4/main_python.c +++ b/S4/main_python.c @@ -30,7 +30,7 @@ #include #include -#ifdef WIN32 +#ifdef _WIN32 #define _USE_MATH_DEFINES #endif diff --git a/S4/numalloc.c b/S4/numalloc.c index c753b532..eb1a6bec 100644 --- a/S4/numalloc.c +++ b/S4/numalloc.c @@ -20,7 +20,7 @@ #include #include -#ifdef WIN32 +#ifdef _WIN32 # include void * _aligned_malloc(size_t size, size_t alignment); void _aligned_free(void *ptr); @@ -33,7 +33,7 @@ typedef uintptr_t malloc_aligned_ULONG_PTR; // The actual size of allocation will be greater than this size. // alignment : the alignment boundary void *malloc_aligned(size_t size, size_t alignment){ -#ifdef WIN32 +#ifdef _WIN32 return (void*)_aligned_malloc(size, alignment); #else void *pa, *ptr; @@ -51,7 +51,7 @@ void *malloc_aligned(size_t size, size_t alignment){ } void free_aligned(void *ptr){ -#ifdef WIN32 +#ifdef _WIN32 _aligned_free(ptr); #else if(ptr){ From c334df234e5df3f2cded795c81faebb1c07a9a87 Mon Sep 17 00:00:00 2001 From: marcus-o Date: Sat, 27 Jun 2020 15:40:39 -0400 Subject: [PATCH 11/11] fixed some crash on windows related to mkl zcopy. not sure if bug or wrong use --- S4/S4.cpp | 8 +++++--- S4/fmm/fmm_FFT.cpp | 4 ++-- S4/main_python.c | 2 +- S4/numalloc.c | 4 ++-- S4/rcwa.cpp | 2 +- S4/sort.c | 2 ++ conda_recipe/s4/bld.bat | 26 ++++++++------------------ conda_recipe/s4/meta.yaml | 6 +++--- 8 files changed, 24 insertions(+), 30 deletions(-) diff --git a/S4/S4.cpp b/S4/S4.cpp index d0ac9dfe..6297aa1a 100644 --- a/S4/S4.cpp +++ b/S4/S4.cpp @@ -61,7 +61,7 @@ namespace bs = boost::serialization; void* S4_malloc(size_t size){ // for debugging - void* ret = malloc_aligned(size, 16); + void* ret = malloc_aligned(size, 128); // memset(ret, 0x0, size); return ret; } @@ -2939,7 +2939,8 @@ int Simulation_GetField(Simulation *S, const double r[3], double fE[6], double f } std::complex *work = ab + n4; - RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1); + memcpy(ab, Lsoln->ab, sizeof(std::complex) * n4); + //RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1); //RNP::IO::PrintVector(n4, ab, 1); TranslateAmplitudes(S->n_G, Lbands->q, L->thickness, dz, ab); std::complex efield[3], hfield[3]; @@ -3051,7 +3052,8 @@ int Simulation_GetFieldPlane(Simulation *S, int nxy[2], double zz, double *E, do } std::complex *work = ab + n4; - RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1); + memcpy(ab, Lsoln->ab, sizeof(std::complex) * n4); + //RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1); //RNP::IO::PrintVector(n4, ab, 1); TranslateAmplitudes(S->n_G, Lbands->q, L->thickness, dz, ab); size_t snxy[2] = { nxy[0], nxy[1] }; diff --git a/S4/fmm/fmm_FFT.cpp b/S4/fmm/fmm_FFT.cpp index e54e0b8f..c9ee7388 100644 --- a/S4/fmm/fmm_FFT.cpp +++ b/S4/fmm/fmm_FFT.cpp @@ -32,8 +32,8 @@ #include "fmm.h" #include -#include -#include +//#include +//#include #include "fft_iface.h" int FMMGetEpsilon_FFT(const Simulation *S, const Layer *L, const int n, std::complex *Epsilon2, std::complex *Epsilon_inv){ diff --git a/S4/main_python.c b/S4/main_python.c index 0c813c59..8f9a366a 100644 --- a/S4/main_python.c +++ b/S4/main_python.c @@ -2169,7 +2169,7 @@ static PyObject *S4_NewInterpolator(PyObject *self, PyObject *args, PyObject *kw static PyObject *S4_SolveInParallel(PyObject *Self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"Layer", "Simulations", NULL}; - const char *layerName; + //const char *layerName; //S4_solve_in Py_RETURN_NONE; } diff --git a/S4/numalloc.c b/S4/numalloc.c index eb1a6bec..94bc9cc5 100644 --- a/S4/numalloc.c +++ b/S4/numalloc.c @@ -22,8 +22,8 @@ #ifdef _WIN32 # include -void * _aligned_malloc(size_t size, size_t alignment); -void _aligned_free(void *ptr); +// void * _aligned_malloc(size_t size, size_t alignment); +// void _aligned_free(void *ptr); #else #include typedef uintptr_t malloc_aligned_ULONG_PTR; diff --git a/S4/rcwa.cpp b/S4/rcwa.cpp index 1c7a8f8d..7c9ce5ce 100644 --- a/S4/rcwa.cpp +++ b/S4/rcwa.cpp @@ -50,7 +50,7 @@ #include static inline void* rcwa_malloc(size_t size){ - void *ret = malloc_aligned(size, 16); + void *ret = malloc_aligned(size, 128); //memset(ret, 0x0, size); return ret; } diff --git a/S4/sort.c b/S4/sort.c index 00b0c369..a82dc5ae 100644 --- a/S4/sort.c +++ b/S4/sort.c @@ -209,7 +209,9 @@ sort (void *const pbase, size_t total_elems, size_t size, of the array to sort, and END_PTR points at the very last element in the array (*not* one beyond it!). */ +#ifndef _MSC_VER #define min(x, y) ((x) < (y) ? (x) : (y)) +#endif { char *const end_ptr = &base_ptr[size * (total_elems - 1)]; diff --git a/conda_recipe/s4/bld.bat b/conda_recipe/s4/bld.bat index 75fb11f9..7dd00058 100644 --- a/conda_recipe/s4/bld.bat +++ b/conda_recipe/s4/bld.bat @@ -1,4 +1,4 @@ -@echo off +@echo on setlocal EnableDelayedExpansion @@ -16,6 +16,7 @@ setlocal EnableDelayedExpansion set DISTUTILS_USE_SDK=1 ::CALL "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 CALL "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build/vcvarsall.bat" amd64 -vcvars_ver=14.0 +@echo on set "BLAS_PATH=%CONDA_PREFIX%/Library/lib/" set "BLAS_LIB=mkl_rt" @@ -38,14 +39,13 @@ set "BOOST_PATH=%CONDA_PREFIX%\Library\lib\" set "BOOST_LIB=" :: Specify custom compilers if needed -set "CXX=cl /O2" -set "CC=cl /O2" +set "CXX=cl" +set "CC=cl" set "OBJDIR=build" -set "S4_BINNAME=%OBJDIR%/S4" set "S4_LIBNAME=%OBJDIR%/S4.lib" -set "CPPFLAGS=-EHa -LD -MD -DCPU86 -I. -IS4 -IS4/RNP -IS4/kiss_fft" +set "CPPFLAGS=-O2 -W2 -EHs -LD -MD -DCPU86 -I. -IS4 -IS4/RNP -IS4/kiss_fft" ::enable debug output ::set "CPPFLAGS=%CPPFLAGS% -DENABLE_S4_TRACE" set "CPPFLAGS=%CPPFLAGS% %BOOST_INC%" @@ -59,15 +59,10 @@ set "LIBS=%BLAS_LIB% %LAPACK_LIB% %FFTW3_LIB% %PTHREAD_LIB% %CHOLMOD_LIB% %BOOST set "LIBPATHS=%BLAS_PATH% %LAPACK_PATH% %FFTW3_PATH% %PTHREAD_PATH% %CHOLMOD_PATH% %BOOST_PATH%" :: Make a build folder and change to it. -::cd S4 -::cd S4 rmdir /Q /s %OBJDIR% -:: mkdir %OBJDIR% cd %OBJDIR% mkdir S4k -mkdir S4r -mkdir modules cd .. %CXX% -c %CPPFLAGS% S4/S4.cpp -Fo%OBJDIR%/S4k/S4.obj %CXX% -c %CPPFLAGS% S4/rcwa.cpp -Fo%OBJDIR%/S4k/rcwa.obj @@ -89,12 +84,11 @@ cd .. IF DEFINED FFTW3_LIB (echo "using fftw") ELSE (%CC% -c %CPPFLAGS% S4/kiss_fft/kiss_fft.c -Fo%OBJDIR%/S4k/kiss_fft.obj) IF DEFINED FFTW3_LIB (echo "using fftw") ELSE (%CC% -c %CPPFLAGS% S4/kiss_fft/tools/kiss_fftnd.c -Fo%OBJDIR%/S4k/kiss_fftnd.obj) %CC% -c %CPPFLAGS% S4/SpectrumSampler.c -Fo%OBJDIR%/S4k/SpectrumSampler.obj -%CC% -c %CPPFLAGS% S4/cubature.c -Fo%OBJDIR%/S4k/cubature.obj +::%CC% -c %CPPFLAGS% S4/cubature.c -Fo%OBJDIR%/S4k/cubature.obj %CC% -c %CPPFLAGS% S4/Interpolator.c -Fo%OBJDIR%/S4k/Interpolator.obj -%CC% -c %CPPFLAGS% S4/convert.c -Fo%OBJDIR%/S4k/convert.obj +::%CC% -c %CPPFLAGS% S4/convert.c -Fo%OBJDIR%/S4k/convert.obj IF DEFINED LAPACK_LIB (echo "using lapack") ELSE (%CXX% -c %CPPFLAGS% S4/RNP/Eigensystems.cpp -Fo%OBJDIR%/S4k/Eigensystems.obj) -@echo on lib.exe %OBJDIR%/S4k/* /out:%S4_LIBNAME% @@ -111,8 +105,6 @@ echo:lib_dirs.extend^([libpath for libpath in '%LIBPATHS%'.split^(^)]^) echo:libs = ['S4'] echo:libs.extend^([lib for lib in '%LIBS%'.split^(^)]^) echo:include_dirs = [np.get_include^(^)] -echo:extra_compile_args = [] -::fixes the run time mismatch warning echo:extra_compile_args = ["-LD", "-MD"] echo:extra_link_args = [] ::echo:extra_link_args = ["/DEBUG", "/verbose:lib"] @@ -135,6 +127,4 @@ echo: description = 'Stanford Stratified Structure Solver ^(S4^): Fourier Modal echo: ext_modules = [S4module] echo:^))>setup.py -IF DEFINED PYTHON ("%PYTHON%" setup.py install) ELSE (python setup.py install) - - +IF DEFINED PYTHON ("%PYTHON%" setup.py install) ELSE (python setup.py install) \ No newline at end of file diff --git a/conda_recipe/s4/meta.yaml b/conda_recipe/s4/meta.yaml index 2ab7efdc..f6dd4fd6 100644 --- a/conda_recipe/s4/meta.yaml +++ b/conda_recipe/s4/meta.yaml @@ -7,7 +7,7 @@ source: path: ../../ build: - number: 2 + number: 5 requirements: build: @@ -32,7 +32,7 @@ requirements: run: - python - {{ pin_compatible('numpy') }} -# - {{ pin_compatible('boost') }} + - {{ pin_compatible('libboost') }} - {{ pin_compatible('suitesparse') }} - {{ pin_compatible('mkl') }} [win] - {{ pin_compatible('openblas') }} [unix] @@ -64,4 +64,4 @@ about: extra: recipe-maintainers: - - marcus-o + - marcus-o \ No newline at end of file