Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Detailed installation instructions (64-bit Ubuntu 16 or 18):
# Installation instructions:
In principle you should be able to install this on Windows / OS X / Linux using conda and 'conda install -c marcus-o s4'.
Install in a new environment to avoid incompatabilities.
I don't have a large test base, so I would appreciate if you let me know if it works (marcus.ossiander at gmail) or not, then I'll try to help).

# Detailed installation instructions (64-bit Ubuntu 16 or 18):
## Key steps:

```
Expand Down
4 changes: 4 additions & 0 deletions S4/RNP/Eigensystems.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef _RNP_EIGENSYSTEMS_H_
#define _RNP_EIGENSYSTEMS_H_

#ifdef _MSC_VER
#include <algorithm>
#endif

#include <cstddef>
#include <complex>

Expand Down
8 changes: 5 additions & 3 deletions S4/S4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ namespace bs = boost::serialization;


void* S4_malloc(size_t size){ // for debugging
void* ret = malloc_aligned(size, 16);
void* ret = malloc_aligned(size, 128);
// memset(ret, 0x0, size);
return ret;
}
Expand Down Expand Up @@ -2939,7 +2939,8 @@ int Simulation_GetField(Simulation *S, const double r[3], double fE[6], double f
}
std::complex<double> *work = ab + n4;

RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1);
memcpy(ab, Lsoln->ab, sizeof(std::complex<double>) * n4);
//RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1);
//RNP::IO::PrintVector(n4, ab, 1);
TranslateAmplitudes(S->n_G, Lbands->q, L->thickness, dz, ab);
std::complex<double> efield[3], hfield[3];
Expand Down Expand Up @@ -3051,7 +3052,8 @@ int Simulation_GetFieldPlane(Simulation *S, int nxy[2], double zz, double *E, do
}
std::complex<double> *work = ab + n4;

RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1);
memcpy(ab, Lsoln->ab, sizeof(std::complex<double>) * n4);
//RNP::TBLAS::Copy(n4, Lsoln->ab,1, ab,1);
//RNP::IO::PrintVector(n4, ab, 1);
TranslateAmplitudes(S->n_G, Lbands->q, L->thickness, dz, ab);
size_t snxy[2] = { nxy[0], nxy[1] };
Expand Down
4 changes: 4 additions & 0 deletions S4/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@
#define PACKAGE_URL ""
#define PACKAGE_VERSION "1.1.1"
#define VERSION "1.1.1"

#ifdef _MSC_VER
#define strcasecmp _stricmp
#endif
16 changes: 8 additions & 8 deletions S4/fmm/fft_iface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "fft_iface.h"
#include <cstdlib>

#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
#include <fftw3.h>
#else
#include <kiss_fft.h>
Expand All @@ -46,23 +46,23 @@ int fft_next_fast_size(int n){
}

std::complex<double> *fft_alloc_complex(size_t n){
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
return (std::complex<double>*)(fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n);
#else
return (std::complex<double>*)KISS_FFT_MALLOC(sizeof(std::complex<double>) * n);
#endif
}

void fft_free(void *p){
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
fftw_free(p);
#else
KISS_FFT_FREE(p);
#endif
}

struct tag_fft_plan{
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
fftw_plan plan;
#else
kiss_fftnd_cfg cfg;
Expand All @@ -76,7 +76,7 @@ fft_plan fft_plan_dft_2d(
int sign
){
fft_plan plan = NULL;
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
# ifdef HAVE_LIBPTHREAD
pthread_mutex_lock(&mutex);
# endif
Expand All @@ -103,7 +103,7 @@ fft_plan fft_plan_dft_2d(
}

void fft_plan_exec(const fft_plan plan){
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
fftw_execute(plan->plan);
#else
kiss_fftnd(plan->cfg, (const kiss_fft_cpx *)plan->in, (kiss_fft_cpx *)plan->out);
Expand All @@ -112,7 +112,7 @@ void fft_plan_exec(const fft_plan plan){

void fft_plan_destroy(fft_plan plan){
if(NULL == plan){ return; }
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
# ifdef HAVE_LIBPTHREAD
pthread_mutex_lock(&mutex);
# endif
Expand All @@ -135,7 +135,7 @@ void fft_init(){
}

void fft_destroy(){
#ifdef HAVE_LIBFFTW3
#ifdef HAVE_FFTW3
fftw_cleanup();
#endif
#ifdef HAVE_LIBPTHREAD
Expand Down
4 changes: 2 additions & 2 deletions S4/fmm/fmm_FFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
#include "fmm.h"

#include <limits>
#include <kiss_fft.h>
#include <tools/kiss_fftnd.h>
//#include <kiss_fft.h>
//#include <tools/kiss_fftnd.h>
#include "fft_iface.h"

int FMMGetEpsilon_FFT(const Simulation *S, const Layer *L, const int n, std::complex<double> *Epsilon2, std::complex<double> *Epsilon_inv){
Expand Down
1 change: 1 addition & 0 deletions S4/fmm/fmm_PolBasisJones.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <config.h>

#define _USE_MATH_DEFINES
#include <cmath>
#include <S4.h>
#include "../RNP/TBLAS.h"
Expand Down
2 changes: 1 addition & 1 deletion S4/fmm/fmm_PolBasisVL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ int FMMGetEpsilon_PolBasisVL(const Simulation *S, const Layer *L, const int n, s
}


fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, -1);
fft_plan plan = fft_plan_dft_2d(ngrid, Ffrom, Fto, 1);

// We fill in the quarter blocks of F in Fortran order
for(int w = 0; w < 4; ++w){
Expand Down
4 changes: 2 additions & 2 deletions S4/main_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include <string.h>
#include <stdarg.h>

#ifdef WIN32
#ifdef _WIN32
#define _USE_MATH_DEFINES
#endif

Expand Down Expand Up @@ -2169,7 +2169,7 @@ static PyObject *S4_NewInterpolator(PyObject *self, PyObject *args, PyObject *kw
static PyObject *S4_SolveInParallel(PyObject *Self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"Layer", "Simulations", NULL};
const char *layerName;
//const char *layerName;
//S4_solve_in
Py_RETURN_NONE;
}
Expand Down
10 changes: 5 additions & 5 deletions S4/numalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
#include <stdlib.h>
#include <stdio.h>

#ifdef WIN32
#ifdef _WIN32
# include <malloc.h>
void * _aligned_malloc(size_t size, size_t alignment);
void _aligned_free(void *ptr);
// void * _aligned_malloc(size_t size, size_t alignment);
// void _aligned_free(void *ptr);
#else
#include <inttypes.h>
typedef uintptr_t malloc_aligned_ULONG_PTR;
Expand All @@ -33,7 +33,7 @@ typedef uintptr_t malloc_aligned_ULONG_PTR;
// The actual size of allocation will be greater than this size.
// alignment : the alignment boundary
void *malloc_aligned(size_t size, size_t alignment){
#ifdef WIN32
#ifdef _WIN32
return (void*)_aligned_malloc(size, alignment);
#else
void *pa, *ptr;
Expand All @@ -51,7 +51,7 @@ void *malloc_aligned(size_t size, size_t alignment){
}

void free_aligned(void *ptr){
#ifdef WIN32
#ifdef _WIN32
_aligned_free(ptr);
#else
if(ptr){
Expand Down
2 changes: 2 additions & 0 deletions S4/pattern/predicates.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifndef _MSC_VER
#include <sys/time.h>
#endif

/* On some machines, the exact arithmetic routines might be defeated by the */
/* use of internal extended precision floating-point registers. Sometimes */
Expand Down
2 changes: 1 addition & 1 deletion S4/rcwa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#include <numalloc.h>

static inline void* rcwa_malloc(size_t size){
void *ret = malloc_aligned(size, 16);
void *ret = malloc_aligned(size, 128);
//memset(ret, 0x0, size);
return ret;
}
Expand Down
2 changes: 2 additions & 0 deletions S4/sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ sort (void *const pbase, size_t total_elems, size_t size,
of the array to sort, and END_PTR points at the very last element in
the array (*not* one beyond it!). */

#ifndef _MSC_VER
#define min(x, y) ((x) < (y) ? (x) : (y))
#endif

{
char *const end_ptr = &base_ptr[size * (total_elems - 1)];
Expand Down
Loading