From f1345cceca4e7689013447c521825afc37cdca51 Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Mon, 31 Oct 2016 00:43:01 +0200 Subject: [PATCH 01/10] Implement CIC DDC for 16-bit integer real signal --- csdr.c | 39 +++++++++++++++++++++++++ libcsdr.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ libcsdr.h | 5 ++++ 3 files changed, 129 insertions(+) diff --git a/csdr.c b/csdr.c index c6f8acf6..033c3a74 100644 --- a/csdr.c +++ b/csdr.c @@ -2036,6 +2036,45 @@ int main(int argc, char *argv[]) } } + if(!strcmp(argv[1],"cicddc_s16_c")) { + float rate=0; + int fd; + int factor=0, insize, outsize; + + if(argc<=2) return badsyntax("need required parameter(s) (decimation factor, [rate])"); + sscanf(argv[2],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=3) return badsyntax("need required parameters (decimation factor, rate)"); + sscanf(argv[3],"%g",&rate); + } + + the_bufsize = getbufsize(); + outsize = the_bufsize / factor; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + for(;;) + { + FEOF_CHECK; + fread(input_buffer, sizeof(int16_t), insize, stdin); + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + if(read_fifo_ctl(fd,"%g\n",&rate)) break; + TRY_YIELD; + } + cicddc_free(state); + } + if(!strcmp(argv[1],"none")) { return 0; diff --git a/libcsdr.c b/libcsdr.c index e6c22cc3..dc368016 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -525,6 +525,91 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps } + +/* CIC DDC */ +#define SINESIZE 0x1000 +typedef int64_t cic_dt; // data type used for integrators and combs +typedef struct { + int factor; + uint64_t phase; + float gain; + cic_dt ig0a, ig0b, ig1a, ig1b; + cic_dt comb0a, comb0b, comb1a, comb1b; + int16_t *sinetable; +} cicddc_t; + +void *cicddc_init(int factor) { + int i; + int sinesize2 = SINESIZE * 5/4; // 25% extra to get cosine from the same table + cicddc_t *s; + s = malloc(sizeof(cicddc_t)); + memset(s, 0, sizeof(cicddc_t)); + + float sineamp = 32767.0f; + s->factor = factor; + s->gain = 1.0f / SHRT_MAX / sineamp / factor / factor / factor; // compensate for gain of 3 integrators + + s->sinetable = malloc(SINESIZE * 5/4 * sizeof(*s->sinetable)); + double f = 2.0*M_PI / (double)SINESIZE; + for(i = 0; i < sinesize2; i++) { + s->sinetable[i] = sineamp * cos(f * i); + } + return s; +} + +void cicddc_free(void *state) { + cicddc_t *s = state; + free(s->sinetable); + free(s); +} + +void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) / M_PI); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int sinep = phase >> (64-12); + in_a = (cic_dt)inp[i] * (cic_dt)sinetable[sinep]; + in_b = (cic_dt)inp[i] * (cic_dt)sinetable[sinep + (SINESIZE/4)]; + phase += freq; + // integrators: + ig0a += in_a; ig0b += in_b; + ig1a += ig0a; ig1b += ig0b; + ig2a += ig1a; ig2b += ig1b; + } + inp += factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + + /* __ __ _ _ _ _ /\ | \/ | | | | | | | | | diff --git a/libcsdr.h b/libcsdr.h index 28ce66e6..bdf5aee0 100644 --- a/libcsdr.h +++ b/libcsdr.h @@ -29,6 +29,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once +#include #define MIN_M(x,y) (((x)>(y))?(y):(x)) /* @@ -186,3 +187,7 @@ void convert_s24_f(unsigned char* input, float* output, int input_size, int bige int is_nan(float f); + +void *cicddc_init(int factor); +void cicddc_free(void *state); +void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); From 0ca94656a265c1b74962108501952357da04d4cb Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Mon, 31 Oct 2016 19:39:25 +0200 Subject: [PATCH 02/10] Optimize CIC DDC --- libcsdr.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libcsdr.c b/libcsdr.c index dc368016..67e28c84 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -527,7 +527,8 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps /* CIC DDC */ -#define SINESIZE 0x1000 +#define SINESHIFT 12 +#define SINESIZE (1<factor = factor; s->gain = 1.0f / SHRT_MAX / sineamp / factor / factor / factor; // compensate for gain of 3 integrators - s->sinetable = malloc(SINESIZE * 5/4 * sizeof(*s->sinetable)); + s->sinetable = malloc(sinesize2 * sizeof(*s->sinetable)); double f = 2.0*M_PI / (double)SINESIZE; for(i = 0; i < sinesize2; i++) { s->sinetable[i] = sineamp * cos(f * i); @@ -582,14 +583,17 @@ void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, fl cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum for(i = 0; i < factor; i++) { cic_dt in_a, in_b; - int sinep = phase >> (64-12); - in_a = (cic_dt)inp[i] * (cic_dt)sinetable[sinep]; - in_b = (cic_dt)inp[i] * (cic_dt)sinetable[sinep + (SINESIZE/4)]; + int sinep = phase >> (64-SINESHIFT); + in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep]; + in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; phase += freq; - // integrators: - ig0a += in_a; ig0b += in_b; - ig1a += ig0a; ig1b += ig0b; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; } inp += factor; // comb filters: From a95008be7f15018f4182fc3bfdd4a1ac6c9326af Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Tue, 1 Nov 2016 00:56:30 +0200 Subject: [PATCH 03/10] Fixed reading from fifo --- csdr.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/csdr.c b/csdr.c index 033c3a74..1b39a092 100644 --- a/csdr.c +++ b/csdr.c @@ -177,13 +177,16 @@ int init_fifo(int argc, char *argv[]) { if(argc>=4) { - if(!strcmp(argv[2],"--fifo")) - { - fprintf(stderr,"csdr: fifo control mode on\n"); - int fd = open(argv[3], O_RDONLY); - int flags = fcntl(fd, F_GETFL, 0); - fcntl(fd, F_SETFL, flags | O_NONBLOCK); - return fd; + int i; + for(i = 2; i < argc-1; i++) { + if(!strcmp(argv[i],"--fifo")) + { + fprintf(stderr,"csdr: fifo control mode on\n"); + int fd = open(argv[i+1], O_RDONLY); + int flags = fcntl(fd, F_GETFL, 0); + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + return fd; + } } } return 0; @@ -2069,7 +2072,7 @@ int main(int argc, char *argv[]) cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); fwrite(output_buffer, sizeof(complexf), outsize, stdout); fflush(stdout); - if(read_fifo_ctl(fd,"%g\n",&rate)) break; + read_fifo_ctl(fd,"%g\n",&rate); TRY_YIELD; } cicddc_free(state); From c05128044255a4c639ca6a27705fa110697cbd66 Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Tue, 1 Nov 2016 01:52:53 +0200 Subject: [PATCH 04/10] Fixed bug to get ddc on correct frequency --- libcsdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcsdr.c b/libcsdr.c index 67e28c84..a6343b63 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -574,7 +574,7 @@ void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, fl int16_t *sinetable = s->sinetable; float gain = s->gain; - freq = rate * ((float)(1ULL << 63) / M_PI); + freq = rate * ((float)(1ULL << 63) * 2); int16_t *inp = input; for(k = 0; k < outsize; k++) { @@ -584,8 +584,8 @@ void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, fl for(i = 0; i < factor; i++) { cic_dt in_a, in_b; int sinep = phase >> (64-SINESHIFT); - in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep]; - in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep]; phase += freq; /* integrators: The calculations are ordered so that each integrator From 2ae2ae9907f7bd74b85fa1623f460ef32a325cbd Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Tue, 2 May 2017 23:23:25 +0300 Subject: [PATCH 05/10] Added CIC DDC for complex input signal and option to directly read from SHM buffer --- csdr.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- libcsdr.c | 55 +++++++++++++++++++++++++++++++++ libcsdr.h | 1 + 3 files changed, 145 insertions(+), 2 deletions(-) diff --git a/csdr.c b/csdr.c index 1b39a092..d60445c3 100644 --- a/csdr.c +++ b/csdr.c @@ -49,6 +49,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include +#include +#include char usage[]= "csdr - a simple commandline tool for Software Defined Radio receiver DSP.\n\n" @@ -2039,10 +2042,12 @@ int main(int argc, char *argv[]) } } - if(!strcmp(argv[1],"cicddc_s16_c")) { + int complex_cic; + if((!strcmp(argv[1],"cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"cicddc_cs16_c"))) { float rate=0; int fd; int factor=0, insize, outsize; + bigbufs = 1; if(argc<=2) return badsyntax("need required parameter(s) (decimation factor, [rate])"); sscanf(argv[2],"%d",&factor); @@ -2060,6 +2065,7 @@ int main(int argc, char *argv[]) outsize = the_bufsize / factor; insize = outsize * factor; // make it integer multiple of factor sendbufsize(outsize); + if(complex_cic) insize *= 2; int16_t *input_buffer = malloc(sizeof(int16_t) * insize); complexf *output_buffer = malloc(sizeof(complexf) * outsize); @@ -2069,7 +2075,10 @@ int main(int argc, char *argv[]) { FEOF_CHECK; fread(input_buffer, sizeof(int16_t), insize, stdin); - cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); fwrite(output_buffer, sizeof(complexf), outsize, stdout); fflush(stdout); read_fifo_ctl(fd,"%g\n",&rate); @@ -2078,6 +2087,84 @@ int main(int argc, char *argv[]) cicddc_free(state); } + if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c"))) { + float rate=0; + int fd, shm_fd; + int factor=0, insize, outsize; + //bigbufs = 1; + + if(argc<=3) return badsyntax("need required parameter(s) (shm name, decimation factor, [rate])"); + sscanf(argv[3],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=4) return badsyntax("need required parameters (shm name, decimation factor, rate)"); + sscanf(argv[4],"%g",&rate); + } + + // some code from shmread: + void *shm_buf; + size_t *shm_p; + size_t readpoint_bufs = 0, writepoint_bufs = 0, bufsize_bufs; + size_t shm_size, bufsize_bytes, insize_bytes, prevp; + struct stat shm_stat; + shm_fd = shm_open(argv[2], O_RDONLY, 0644); + if(shm_fd < 0) { perror("shm_open failed"); return 1; } + if(fstat(shm_fd, &shm_stat) < 0) { perror("fstat failed"); return 1; } + shm_size = shm_stat.st_size; + bufsize_bytes = shm_size - sizeof(size_t); + shm_buf = mmap(0, shm_size, PROT_READ, MAP_SHARED, shm_fd, 0); + if(shm_buf == MAP_FAILED) { perror("mmap failed"); return 1; } + shm_p = shm_buf + bufsize_bytes; + prevp = *shm_p; + if(prevp >= bufsize_bytes) return badsyntax("bad pointer value in shm buffer"); + + outsize = 0x4000; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + if(complex_cic) insize *= 2; + + insize_bytes = sizeof(int16_t) * insize; + /* We don't need modulo or memory mapping tricks if wrap-around occurs at input block boundary. + This needs the SHM buffer size to be a multiple of insize. */ + if(bufsize_bytes % insize_bytes != 0) return badsyntax("SHM size should be multiple of CIC processing block size"); + + bufsize_bufs = bufsize_bytes / insize_bytes; + writepoint_bufs = readpoint_bufs = prevp / insize_bytes; + + int16_t *input_buffer /*= malloc(sizeof(int16_t) * insize)*/; + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + for(;;) + { + fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); + if(writepoint_bufs != readpoint_bufs) { + //memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs, insize_bytes); + input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs); + if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + + // advance pointer: + readpoint_bufs++; + if(readpoint_bufs >= bufsize_bufs) readpoint_bufs = 0; + } else { + usleep(50000); + read_fifo_ctl(fd,"%g\n",&rate); + writepoint_bufs = (*shm_p) / insize_bytes; + } + /*TRY_YIELD;*/ + } + cicddc_free(state); + } + if(!strcmp(argv[1],"none")) { return 0; diff --git a/libcsdr.c b/libcsdr.c index a6343b63..87b7f87f 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -613,6 +613,61 @@ void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, fl s->phase = phase; } +void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + m_a = inp[2*i]; + m_b = inp[2*i+1]; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + /* __ __ _ _ _ _ diff --git a/libcsdr.h b/libcsdr.h index bdf5aee0..6604c54b 100644 --- a/libcsdr.h +++ b/libcsdr.h @@ -191,3 +191,4 @@ int is_nan(float f); void *cicddc_init(int factor); void cicddc_free(void *state); void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); +void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); From 1a28b46ea1c8f8a6203324bb4587b0670fea25c8 Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Wed, 3 May 2017 02:01:43 +0300 Subject: [PATCH 06/10] Some optimization of SHM+DDC and support for delaying the signal --- csdr.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/csdr.c b/csdr.c index d60445c3..c4b8ab80 100644 --- a/csdr.c +++ b/csdr.c @@ -2122,7 +2122,7 @@ int main(int argc, char *argv[]) prevp = *shm_p; if(prevp >= bufsize_bytes) return badsyntax("bad pointer value in shm buffer"); - outsize = 0x4000; + outsize = 0x400; insize = outsize * factor; // make it integer multiple of factor sendbufsize(outsize); if(complex_cic) insize *= 2; @@ -2135,29 +2135,43 @@ int main(int argc, char *argv[]) bufsize_bufs = bufsize_bytes / insize_bytes; writepoint_bufs = readpoint_bufs = prevp / insize_bytes; - int16_t *input_buffer /*= malloc(sizeof(int16_t) * insize)*/; + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); complexf *output_buffer = malloc(sizeof(complexf) * outsize); void *state = cicddc_init(factor); + size_t extradelay = 0; for(;;) { - fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); + //fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); if(writepoint_bufs != readpoint_bufs) { - //memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs, insize_bytes); - input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs); + int r; + ssize_t readpoint_bufs1 = (ssize_t)readpoint_bufs - extradelay; + while(readpoint_bufs1 < 0) readpoint_bufs1 += bufsize_bufs; + + //input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs1); + + /* It seems strange but memcpying small blocks and reading from there + is faster than reading directly from the SHM buffer during processing + (at least on the server I tested it on). */ + memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs1, insize_bytes); + if(complex_cic) cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); else cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); fwrite(output_buffer, sizeof(complexf), outsize, stdout); - fflush(stdout); + //fflush(stdout); // advance pointer: readpoint_bufs++; if(readpoint_bufs >= bufsize_bufs) readpoint_bufs = 0; } else { + ssize_t newdelay = -1; usleep(50000); - read_fifo_ctl(fd,"%g\n",&rate); + read_fifo_ctl(fd,"%g %zd\n",&rate, &newdelay); + if(newdelay >= 0 && newdelay < bufsize_bytes) + extradelay = newdelay / insize_bytes; + writepoint_bufs = (*shm_p) / insize_bytes; } /*TRY_YIELD;*/ From dd1dd09f7a370abeed1fff2a97a6f13cb831b43c Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Wed, 3 May 2017 22:46:12 +0300 Subject: [PATCH 07/10] Allow combining FFT and averaging in one process for reduced pipe overhead --- csdr.c | 65 +++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/csdr.c b/csdr.c index c4b8ab80..595e68b8 100644 --- a/csdr.c +++ b/csdr.c @@ -1293,7 +1293,8 @@ int main(int argc, char *argv[]) } } - if(!strcmp(argv[1],"fft_cc")) + int combined_logavg; + if((!strcmp(argv[1],"fft_cc")) || (combined_logavg = !strcmp(argv[1],"fft_cc_logavg")) ) { if(argc<=3) return badsyntax("need required parameters (fft_size, out_of_every_n_samples)"); int fft_size; @@ -1304,19 +1305,35 @@ int main(int argc, char *argv[]) int benchmark=0; int octave=0; window_t window = WINDOW_DEFAULT; - if(argc>=5) - { - window=firdes_get_window_from_string(argv[4]); - } - if(argc>=6) - { - benchmark|=!strcmp("--benchmark",argv[5]); - octave|=!strcmp("--octave",argv[5]); - } - if(argc>=7) - { - benchmark|=!strcmp("--benchmark",argv[6]); - octave|=!strcmp("--octave",argv[6]); + + // for averaging: + float add_db=0; + int avgnumber=0, n_averaged=0; + float *output2; + if(combined_logavg) { + if(argc<=5) return badsyntax("need required parameters (fft_size, out_of_every_n_samples, add_db, avgnumber)"); + + sscanf(argv[4],"%g",&add_db); + sscanf(argv[5],"%d",&avgnumber); + + output2 = malloc(sizeof(float) * fft_size); + + add_db -= 10.0*log10(avgnumber); + } else { + if(argc>=5) + { + window=firdes_get_window_from_string(argv[4]); + } + if(argc>=6) + { + benchmark|=!strcmp("--benchmark",argv[5]); + octave|=!strcmp("--octave",argv[5]); + } + if(argc>=7) + { + benchmark|=!strcmp("--benchmark",argv[6]); + octave|=!strcmp("--octave",argv[6]); + } } if(!initialize_buffers()) return -2; @@ -1353,6 +1370,20 @@ int main(int argc, char *argv[]) //apply_window_c(input,windowed,fft_size,window); apply_precalculated_window_c(input,windowed,fft_size,windowt); fft_execute(plan); + if(combined_logavg) { + int i; + accumulate_power_cf((complexf*)output, output2, fft_size); + n_averaged++; + if(n_averaged >= avgnumber) { + log_ff(output2, output2, fft_size, add_db); + fwrite (output2, sizeof(float), fft_size, stdout); + TRY_YIELD; + for(i = 0; i < fft_size; i++) { + output2[i] = 0; + } + n_averaged = 0; + } + } else if(octave) { printf("fftdata=["); @@ -2160,7 +2191,7 @@ int main(int argc, char *argv[]) else cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); fwrite(output_buffer, sizeof(complexf), outsize, stdout); - //fflush(stdout); + fflush(stdout); // advance pointer: readpoint_bufs++; @@ -2169,8 +2200,10 @@ int main(int argc, char *argv[]) ssize_t newdelay = -1; usleep(50000); read_fifo_ctl(fd,"%g %zd\n",&rate, &newdelay); - if(newdelay >= 0 && newdelay < bufsize_bytes) + if(newdelay >= 0 && newdelay < bufsize_bytes) { extradelay = newdelay / insize_bytes; + fprintf(stderr, "delaying: %zd %zd\n", extradelay, newdelay); + } writepoint_bufs = (*shm_p) / insize_bytes; } From 41099fe3b97e2e5232cd26b91821db2679863084 Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Thu, 4 May 2017 00:28:51 +0300 Subject: [PATCH 08/10] More experimental kludge: combine type conversion in fft_cc_logavg to reduce piping overhead --- csdr.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/csdr.c b/csdr.c index 595e68b8..3babd3cc 100644 --- a/csdr.c +++ b/csdr.c @@ -1306,6 +1306,8 @@ int main(int argc, char *argv[]) int octave=0; window_t window = WINDOW_DEFAULT; + int int_in=0; + int16_t *input_int; // for averaging: float add_db=0; int avgnumber=0, n_averaged=0; @@ -1319,6 +1321,8 @@ int main(int argc, char *argv[]) output2 = malloc(sizeof(float) * fft_size); add_db -= 10.0*log10(avgnumber); + if(argc >= 7 && !strcmp(argv[6], "--cs16_in")) + int_in = 1; } else { if(argc>=5) { @@ -1349,23 +1353,37 @@ int main(int argc, char *argv[]) if(octave) printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size); float *windowt; windowt = precalculate_window(fft_size, window); + size_t sizeof_in = sizeof(complexf); + if(int_in) { + sizeof_in = 2*sizeof(int16_t); + input_int = (int16_t*)malloc(sizeof(int16_t)*fft_size*2); + } for(;;) { FEOF_CHECK; if(every_n_samples>fft_size) { - fread(input, sizeof(complexf), fft_size, stdin); + fread(int_in ?(void*)input_int :(void*)input, sizeof_in, fft_size, stdin); + if(int_in) + convert_i16_f(input_int, (float*)input, 2*fft_size); + //skipping samples before next FFT (but fseek doesn't work for pipes) for(int seek_remain=every_n_samples-fft_size;seek_remain>0;seek_remain-=the_bufsize) { - fread(temp_f, sizeof(complexf), MIN_M(the_bufsize,seek_remain), stdin); + fread(temp_f, sizeof_in, MIN_M(the_bufsize,seek_remain), stdin); } } else { //overlapped FFT for(int i=0;i Date: Tue, 9 May 2017 20:39:52 +0300 Subject: [PATCH 09/10] Added shm_cicddc_cu8_c --- csdr.c | 13 ++++++++---- libcsdr.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ libcsdr.h | 1 + 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/csdr.c b/csdr.c index 3babd3cc..dbbcdb0f 100644 --- a/csdr.c +++ b/csdr.c @@ -2091,7 +2091,7 @@ int main(int argc, char *argv[]) } } - int complex_cic; + {int complex_cic, cu8_cic; if((!strcmp(argv[1],"cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"cicddc_cs16_c"))) { float rate=0; int fd; @@ -2136,7 +2136,10 @@ int main(int argc, char *argv[]) cicddc_free(state); } - if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c"))) { + /* For now, complex uint8 is handled similarly to real int16 because the size is the same. + (Two 8-bit numbers in one "int16_t") */ + if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c")) + | (cu8_cic=!strcmp(argv[1],"shm_cicddc_cu8_c"))) { float rate=0; int fd, shm_fd; int factor=0, insize, outsize; @@ -2204,7 +2207,9 @@ int main(int argc, char *argv[]) (at least on the server I tested it on). */ memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs1, insize_bytes); - if(complex_cic) + if(cu8_cic) + cicddc_cu8_c(state, (uint8_t*)input_buffer, output_buffer, outsize, rate); + else if(complex_cic) cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); else cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); @@ -2228,7 +2233,7 @@ int main(int argc, char *argv[]) /*TRY_YIELD;*/ } cicddc_free(state); - } + }} if(!strcmp(argv[1],"none")) { diff --git a/libcsdr.c b/libcsdr.c index 87b7f87f..82e21895 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -669,6 +669,68 @@ void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, f } +/* This is almost copy paste from cicddc_cs16_c. + I'm afraid this is going to be annoying to maintain... */ +void cicddc_cu8_c(void *state, uint8_t *input, complexf *output, int outsize, float rate) { + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + uint8_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + // subtract 127.4 (good for rtl-sdr) + m_a = (((int32_t)inp[2*i]) << 8) - 32614; + m_b = (((int32_t)inp[2*i+1]) << 8) - 32614; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; +} + + + + + /* __ __ _ _ _ _ /\ | \/ | | | | | | | | | diff --git a/libcsdr.h b/libcsdr.h index 6604c54b..d6ab855c 100644 --- a/libcsdr.h +++ b/libcsdr.h @@ -192,3 +192,4 @@ void *cicddc_init(int factor); void cicddc_free(void *state); void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate); +void cicddc_cu8_c(void *state, uint8_t *input, complexf *output, int outsize, float rate); From f474d09f5b244780d85283808f4dae85bea49820 Mon Sep 17 00:00:00 2001 From: Tatu Peltola Date: Mon, 30 Oct 2017 20:15:40 +0000 Subject: [PATCH 10/10] Change tabs to 4 spaces to be consistent with current version of csdr --- csdr.c | 382 +++++++++++++++++++++++++++--------------------------- libcsdr.c | 356 +++++++++++++++++++++++++------------------------- 2 files changed, 369 insertions(+), 369 deletions(-) diff --git a/csdr.c b/csdr.c index 893acaa7..ece7f3ca 100755 --- a/csdr.c +++ b/csdr.c @@ -262,7 +262,7 @@ int init_fifo(int argc, char *argv[]) int i; for(i = 2; i < argc-1; i++) { char *arg_name = argv[i], *arg_param = argv[i+1]; - if(!strcmp(arg_name,"--fifo")) + if(!strcmp(arg_name,"--fifo")) { errhead(); fprintf(stderr,"fifo control mode on\n"); int fd = open(arg_param, O_RDONLY); @@ -1575,8 +1575,8 @@ int main(int argc, char *argv[]) } } - int combined_logavg; - if((!strcmp(argv[1],"fft_cc")) || (combined_logavg = !strcmp(argv[1],"fft_cc_logavg")) ) + int combined_logavg; + if((!strcmp(argv[1],"fft_cc")) || (combined_logavg = !strcmp(argv[1],"fft_cc_logavg")) ) { if(argc<=3) return badsyntax("need required parameters (fft_size, out_of_every_n_samples)"); int fft_size; @@ -1588,24 +1588,24 @@ int main(int argc, char *argv[]) int octave=0; window_t window = WINDOW_DEFAULT; - int int_in=0; - int16_t *input_int; - // for averaging: - float add_db=0; - int avgnumber=0, n_averaged=0; - float *output2; - if(combined_logavg) { - if(argc<=5) return badsyntax("need required parameters (fft_size, out_of_every_n_samples, add_db, avgnumber)"); + int int_in=0; + int16_t *input_int; + // for averaging: + float add_db=0; + int avgnumber=0, n_averaged=0; + float *output2; + if(combined_logavg) { + if(argc<=5) return badsyntax("need required parameters (fft_size, out_of_every_n_samples, add_db, avgnumber)"); - sscanf(argv[4],"%g",&add_db); - sscanf(argv[5],"%d",&avgnumber); + sscanf(argv[4],"%g",&add_db); + sscanf(argv[5],"%d",&avgnumber); - output2 = malloc(sizeof(float) * fft_size); + output2 = malloc(sizeof(float) * fft_size); - add_db -= 10.0*log10(avgnumber); - if(argc >= 7 && !strcmp(argv[6], "--cs16_in")) - int_in = 1; - } else { + add_db -= 10.0*log10(avgnumber); + if(argc >= 7 && !strcmp(argv[6], "--cs16_in")) + int_in = 1; + } else { if(argc>=5) { window=firdes_get_window_from_string(argv[4]); @@ -1620,7 +1620,7 @@ int main(int argc, char *argv[]) benchmark|=!strcmp("--benchmark",argv[6]); octave|=!strcmp("--octave",argv[6]); } - } + } if(!initialize_buffers()) return -2; sendbufsize(fft_size); @@ -1635,55 +1635,55 @@ int main(int argc, char *argv[]) if(octave) printf("setenv(\"GNUTERM\",\"X11 noraise\");y=zeros(1,%d);semilogy(y,\"ydatasource\",\"y\");\n",fft_size); float *windowt; windowt = precalculate_window(fft_size, window); - size_t sizeof_in = sizeof(complexf); - if(int_in) { - sizeof_in = 2*sizeof(int16_t); - input_int = (int16_t*)malloc(sizeof(int16_t)*fft_size*2); - } + size_t sizeof_in = sizeof(complexf); + if(int_in) { + sizeof_in = 2*sizeof(int16_t); + input_int = (int16_t*)malloc(sizeof(int16_t)*fft_size*2); + } for(;;) { FEOF_CHECK; if(every_n_samples>fft_size) { - fread(int_in ?(void*)input_int :(void*)input, sizeof_in, fft_size, stdin); - if(int_in) - convert_i16_f(input_int, (float*)input, 2*fft_size); + fread(int_in ?(void*)input_int :(void*)input, sizeof_in, fft_size, stdin); + if(int_in) + convert_i16_f(input_int, (float*)input, 2*fft_size); //skipping samples before next FFT (but fseek doesn't work for pipes) for(int seek_remain=every_n_samples-fft_size;seek_remain>0;seek_remain-=the_bufsize) { - fread(temp_f, sizeof_in, MIN_M(the_bufsize,seek_remain), stdin); + fread(temp_f, sizeof_in, MIN_M(the_bufsize,seek_remain), stdin); } } else { //overlapped FFT for(int i=0;i= avgnumber) { - log_ff(output2, output2, fft_size, add_db); - fwrite (output2, sizeof(float), fft_size, stdout); - TRY_YIELD; - for(i = 0; i < fft_size; i++) { - output2[i] = 0; - } - n_averaged = 0; - } - } else + if(combined_logavg) { + int i; + accumulate_power_cf((complexf*)output, output2, fft_size); + n_averaged++; + if(n_averaged >= avgnumber) { + log_ff(output2, output2, fft_size, add_db); + fwrite (output2, sizeof(float), fft_size, stdout); + TRY_YIELD; + for(i = 0; i < fft_size; i++) { + output2[i] = 0; + } + n_averaged = 0; + } + } else if(octave) { printf("fftdata=["); @@ -3662,149 +3662,149 @@ int main(int argc, char *argv[]) \____|___\____| |____/|____/ \____| */ - {int complex_cic, cu8_cic; - if((!strcmp(argv[1],"cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"cicddc_cs16_c"))) { - float rate=0; - int fd; - int factor=0, insize, outsize; - bigbufs = 1; - - if(argc<=2) return badsyntax("need required parameter(s) (decimation factor, [rate])"); - sscanf(argv[2],"%d",&factor); - if(fd=init_fifo(argc,argv)) - { - while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); - } - else - { - if(argc<=3) return badsyntax("need required parameters (decimation factor, rate)"); - sscanf(argv[3],"%g",&rate); - } - - the_bufsize = getbufsize(); - outsize = the_bufsize / factor; - insize = outsize * factor; // make it integer multiple of factor - sendbufsize(outsize); - if(complex_cic) insize *= 2; - - int16_t *input_buffer = malloc(sizeof(int16_t) * insize); - complexf *output_buffer = malloc(sizeof(complexf) * outsize); - - void *state = cicddc_init(factor); - for(;;) - { - FEOF_CHECK; - fread(input_buffer, sizeof(int16_t), insize, stdin); - if(complex_cic) - cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); - else - cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); - fwrite(output_buffer, sizeof(complexf), outsize, stdout); - fflush(stdout); - read_fifo_ctl(fd,"%g\n",&rate); - TRY_YIELD; - } - cicddc_free(state); - } - - /* For now, complex uint8 is handled similarly to real int16 because the size is the same. - (Two 8-bit numbers in one "int16_t") */ - if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c")) - | (cu8_cic=!strcmp(argv[1],"shm_cicddc_cu8_c"))) { - float rate=0; - int fd, shm_fd; - int factor=0, insize, outsize; - //bigbufs = 1; - - if(argc<=3) return badsyntax("need required parameter(s) (shm name, decimation factor, [rate])"); - sscanf(argv[3],"%d",&factor); - if(fd=init_fifo(argc,argv)) - { - while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); - } - else - { - if(argc<=4) return badsyntax("need required parameters (shm name, decimation factor, rate)"); - sscanf(argv[4],"%g",&rate); - } - - // some code from shmread: - void *shm_buf; - size_t *shm_p; - size_t readpoint_bufs = 0, writepoint_bufs = 0, bufsize_bufs; - size_t shm_size, bufsize_bytes, insize_bytes, prevp; - struct stat shm_stat; - shm_fd = shm_open(argv[2], O_RDONLY, 0644); - if(shm_fd < 0) { perror("shm_open failed"); return 1; } - if(fstat(shm_fd, &shm_stat) < 0) { perror("fstat failed"); return 1; } - shm_size = shm_stat.st_size; - bufsize_bytes = shm_size - sizeof(size_t); - shm_buf = mmap(0, shm_size, PROT_READ, MAP_SHARED, shm_fd, 0); - if(shm_buf == MAP_FAILED) { perror("mmap failed"); return 1; } - shm_p = shm_buf + bufsize_bytes; - prevp = *shm_p; - if(prevp >= bufsize_bytes) return badsyntax("bad pointer value in shm buffer"); - - outsize = 0x400; - insize = outsize * factor; // make it integer multiple of factor - sendbufsize(outsize); - if(complex_cic) insize *= 2; - - insize_bytes = sizeof(int16_t) * insize; - /* We don't need modulo or memory mapping tricks if wrap-around occurs at input block boundary. - This needs the SHM buffer size to be a multiple of insize. */ - if(bufsize_bytes % insize_bytes != 0) return badsyntax("SHM size should be multiple of CIC processing block size"); - - bufsize_bufs = bufsize_bytes / insize_bytes; - writepoint_bufs = readpoint_bufs = prevp / insize_bytes; - - int16_t *input_buffer = malloc(sizeof(int16_t) * insize); - complexf *output_buffer = malloc(sizeof(complexf) * outsize); - - void *state = cicddc_init(factor); - size_t extradelay = 0; - for(;;) - { - //fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); - if(writepoint_bufs != readpoint_bufs) { - int r; - ssize_t readpoint_bufs1 = (ssize_t)readpoint_bufs - extradelay; - while(readpoint_bufs1 < 0) readpoint_bufs1 += bufsize_bufs; - - //input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs1); - - /* It seems strange but memcpying small blocks and reading from there - is faster than reading directly from the SHM buffer during processing - (at least on the server I tested it on). */ - memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs1, insize_bytes); - - if(cu8_cic) - cicddc_cu8_c(state, (uint8_t*)input_buffer, output_buffer, outsize, rate); - else if(complex_cic) - cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); - else - cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); - fwrite(output_buffer, sizeof(complexf), outsize, stdout); - fflush(stdout); - - // advance pointer: - readpoint_bufs++; - if(readpoint_bufs >= bufsize_bufs) readpoint_bufs = 0; - } else { - ssize_t newdelay = -1; - usleep(50000); - read_fifo_ctl(fd,"%g %zd\n",&rate, &newdelay); - if(newdelay >= 0 && newdelay < bufsize_bytes) { - extradelay = newdelay / insize_bytes; - fprintf(stderr, "delaying: %zd %zd\n", extradelay, newdelay); - } - - writepoint_bufs = (*shm_p) / insize_bytes; - } - /*TRY_YIELD;*/ - } - cicddc_free(state); - }} + {int complex_cic, cu8_cic; + if((!strcmp(argv[1],"cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"cicddc_cs16_c"))) { + float rate=0; + int fd; + int factor=0, insize, outsize; + bigbufs = 1; + + if(argc<=2) return badsyntax("need required parameter(s) (decimation factor, [rate])"); + sscanf(argv[2],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=3) return badsyntax("need required parameters (decimation factor, rate)"); + sscanf(argv[3],"%g",&rate); + } + + the_bufsize = getbufsize(); + outsize = the_bufsize / factor; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + if(complex_cic) insize *= 2; + + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + for(;;) + { + FEOF_CHECK; + fread(input_buffer, sizeof(int16_t), insize, stdin); + if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + read_fifo_ctl(fd,"%g\n",&rate); + TRY_YIELD; + } + cicddc_free(state); + } + + /* For now, complex uint8 is handled similarly to real int16 because the size is the same. + (Two 8-bit numbers in one "int16_t") */ + if((!strcmp(argv[1],"shm_cicddc_s16_c")) | (complex_cic=!strcmp(argv[1],"shm_cicddc_cs16_c")) + | (cu8_cic=!strcmp(argv[1],"shm_cicddc_cu8_c"))) { + float rate=0; + int fd, shm_fd; + int factor=0, insize, outsize; + //bigbufs = 1; + + if(argc<=3) return badsyntax("need required parameter(s) (shm name, decimation factor, [rate])"); + sscanf(argv[3],"%d",&factor); + if(fd=init_fifo(argc,argv)) + { + while(!read_fifo_ctl(fd,"%g\n",&rate)) usleep(10000); + } + else + { + if(argc<=4) return badsyntax("need required parameters (shm name, decimation factor, rate)"); + sscanf(argv[4],"%g",&rate); + } + + // some code from shmread: + void *shm_buf; + size_t *shm_p; + size_t readpoint_bufs = 0, writepoint_bufs = 0, bufsize_bufs; + size_t shm_size, bufsize_bytes, insize_bytes, prevp; + struct stat shm_stat; + shm_fd = shm_open(argv[2], O_RDONLY, 0644); + if(shm_fd < 0) { perror("shm_open failed"); return 1; } + if(fstat(shm_fd, &shm_stat) < 0) { perror("fstat failed"); return 1; } + shm_size = shm_stat.st_size; + bufsize_bytes = shm_size - sizeof(size_t); + shm_buf = mmap(0, shm_size, PROT_READ, MAP_SHARED, shm_fd, 0); + if(shm_buf == MAP_FAILED) { perror("mmap failed"); return 1; } + shm_p = shm_buf + bufsize_bytes; + prevp = *shm_p; + if(prevp >= bufsize_bytes) return badsyntax("bad pointer value in shm buffer"); + + outsize = 0x400; + insize = outsize * factor; // make it integer multiple of factor + sendbufsize(outsize); + if(complex_cic) insize *= 2; + + insize_bytes = sizeof(int16_t) * insize; + /* We don't need modulo or memory mapping tricks if wrap-around occurs at input block boundary. + This needs the SHM buffer size to be a multiple of insize. */ + if(bufsize_bytes % insize_bytes != 0) return badsyntax("SHM size should be multiple of CIC processing block size"); + + bufsize_bufs = bufsize_bytes / insize_bytes; + writepoint_bufs = readpoint_bufs = prevp / insize_bytes; + + int16_t *input_buffer = malloc(sizeof(int16_t) * insize); + complexf *output_buffer = malloc(sizeof(complexf) * outsize); + + void *state = cicddc_init(factor); + size_t extradelay = 0; + for(;;) + { + //fprintf(stderr, "%d %d\n", writepoint_bufs, readpoint_bufs); + if(writepoint_bufs != readpoint_bufs) { + int r; + ssize_t readpoint_bufs1 = (ssize_t)readpoint_bufs - extradelay; + while(readpoint_bufs1 < 0) readpoint_bufs1 += bufsize_bufs; + + //input_buffer = (int16_t*)(shm_buf + insize_bytes * readpoint_bufs1); + + /* It seems strange but memcpying small blocks and reading from there + is faster than reading directly from the SHM buffer during processing + (at least on the server I tested it on). */ + memcpy(input_buffer, shm_buf + insize_bytes * readpoint_bufs1, insize_bytes); + + if(cu8_cic) + cicddc_cu8_c(state, (uint8_t*)input_buffer, output_buffer, outsize, rate); + else if(complex_cic) + cicddc_cs16_c(state, input_buffer, output_buffer, outsize, rate); + else + cicddc_s16_c(state, input_buffer, output_buffer, outsize, rate); + fwrite(output_buffer, sizeof(complexf), outsize, stdout); + fflush(stdout); + + // advance pointer: + readpoint_bufs++; + if(readpoint_bufs >= bufsize_bufs) readpoint_bufs = 0; + } else { + ssize_t newdelay = -1; + usleep(50000); + read_fifo_ctl(fd,"%g %zd\n",&rate, &newdelay); + if(newdelay >= 0 && newdelay < bufsize_bytes) { + extradelay = newdelay / insize_bytes; + fprintf(stderr, "delaying: %zd %zd\n", extradelay, newdelay); + } + + writepoint_bufs = (*shm_p) / insize_bytes; + } + /*TRY_YIELD;*/ + } + cicddc_free(state); + }} if(!strcmp(argv[1],"none")) { diff --git a/libcsdr.c b/libcsdr.c index 7a111bae..3ee1774d 100755 --- a/libcsdr.c +++ b/libcsdr.c @@ -854,200 +854,200 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps #define SINESIZE (1<factor = factor; - s->gain = 1.0f / SHRT_MAX / sineamp / factor / factor / factor; // compensate for gain of 3 integrators + float sineamp = 32767.0f; + s->factor = factor; + s->gain = 1.0f / SHRT_MAX / sineamp / factor / factor / factor; // compensate for gain of 3 integrators - s->sinetable = malloc(sinesize2 * sizeof(*s->sinetable)); - double f = 2.0*M_PI / (double)SINESIZE; - for(i = 0; i < sinesize2; i++) { - s->sinetable[i] = sineamp * cos(f * i); - } - return s; + s->sinetable = malloc(sinesize2 * sizeof(*s->sinetable)); + double f = 2.0*M_PI / (double)SINESIZE; + for(i = 0; i < sinesize2; i++) { + s->sinetable[i] = sineamp * cos(f * i); + } + return s; } void cicddc_free(void *state) { - cicddc_t *s = state; - free(s->sinetable); - free(s); + cicddc_t *s = state; + free(s->sinetable); + free(s); } void cicddc_s16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { - cicddc_t *s = state; - int k; - int factor = s->factor; - cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; - cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; - uint64_t phase = s->phase, freq; - int16_t *sinetable = s->sinetable; - float gain = s->gain; - - freq = rate * ((float)(1ULL << 63) * 2); - - int16_t *inp = input; - for(k = 0; k < outsize; k++) { - int i; - cic_dt out0a, out0b, out1a, out1b; - cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum - for(i = 0; i < factor; i++) { - cic_dt in_a, in_b; - int sinep = phase >> (64-SINESHIFT); - in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; - in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep]; - phase += freq; - /* integrators: - The calculations are ordered so that each integrator - takes a result from previous loop iteration - to make the code more "pipeline-friendly". */ - ig2a += ig1a; ig2b += ig1b; - ig1a += ig0a; ig1b += ig0b; - ig0a += in_a; ig0b += in_b; - } - inp += factor; - // comb filters: - out0a = ig2a - comb0a; out0b = ig2b - comb0b; - comb0a = ig2a; comb0b = ig2b; - out1a = out0a - comb1a; out1b = out0b - comb1b; - comb1a = out0a; comb1b = out0b; - - output[k].i = (float)out1a * gain; - output[k].q = (float)out1b * gain; - } - - s->ig0a = ig0a; s->ig0b = ig0b; - s->ig1a = ig1a; s->ig1b = ig1b; - s->comb0a = comb0a; s->comb0b = comb0b; - s->comb1a = comb1a; s->comb1b = comb1b; - s->phase = phase; + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int sinep = phase >> (64-SINESHIFT); + in_a = (int32_t)inp[i] * (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + in_b = (int32_t)inp[i] * (int32_t)sinetable[sinep]; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; } void cicddc_cs16_c(void *state, int16_t *input, complexf *output, int outsize, float rate) { - cicddc_t *s = state; - int k; - int factor = s->factor; - cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; - cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; - uint64_t phase = s->phase, freq; - int16_t *sinetable = s->sinetable; - float gain = s->gain; - - freq = rate * ((float)(1ULL << 63) * 2); - - int16_t *inp = input; - for(k = 0; k < outsize; k++) { - int i; - cic_dt out0a, out0b, out1a, out1b; - cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum - for(i = 0; i < factor; i++) { - cic_dt in_a, in_b; - int32_t m_a, m_b, m_c, m_d; - int sinep = phase >> (64-SINESHIFT); - m_a = inp[2*i]; - m_b = inp[2*i+1]; - m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; - m_d = (int32_t)sinetable[sinep]; - // complex multiplication: - in_a = m_a*m_c - m_b*m_d; - in_b = m_a*m_d + m_b*m_c; - phase += freq; - /* integrators: - The calculations are ordered so that each integrator - takes a result from previous loop iteration - to make the code more "pipeline-friendly". */ - ig2a += ig1a; ig2b += ig1b; - ig1a += ig0a; ig1b += ig0b; - ig0a += in_a; ig0b += in_b; - } - inp += 2*factor; - // comb filters: - out0a = ig2a - comb0a; out0b = ig2b - comb0b; - comb0a = ig2a; comb0b = ig2b; - out1a = out0a - comb1a; out1b = out0b - comb1b; - comb1a = out0a; comb1b = out0b; - - output[k].i = (float)out1a * gain; - output[k].q = (float)out1b * gain; - } - - s->ig0a = ig0a; s->ig0b = ig0b; - s->ig1a = ig1a; s->ig1b = ig1b; - s->comb0a = comb0a; s->comb0b = comb0b; - s->comb1a = comb1a; s->comb1b = comb1b; - s->phase = phase; + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + int16_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + m_a = inp[2*i]; + m_b = inp[2*i+1]; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; } /* This is almost copy paste from cicddc_cs16_c. I'm afraid this is going to be annoying to maintain... */ void cicddc_cu8_c(void *state, uint8_t *input, complexf *output, int outsize, float rate) { - cicddc_t *s = state; - int k; - int factor = s->factor; - cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; - cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; - uint64_t phase = s->phase, freq; - int16_t *sinetable = s->sinetable; - float gain = s->gain; - - freq = rate * ((float)(1ULL << 63) * 2); - - uint8_t *inp = input; - for(k = 0; k < outsize; k++) { - int i; - cic_dt out0a, out0b, out1a, out1b; - cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum - for(i = 0; i < factor; i++) { - cic_dt in_a, in_b; - int32_t m_a, m_b, m_c, m_d; - int sinep = phase >> (64-SINESHIFT); - // subtract 127.4 (good for rtl-sdr) - m_a = (((int32_t)inp[2*i]) << 8) - 32614; - m_b = (((int32_t)inp[2*i+1]) << 8) - 32614; - m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; - m_d = (int32_t)sinetable[sinep]; - // complex multiplication: - in_a = m_a*m_c - m_b*m_d; - in_b = m_a*m_d + m_b*m_c; - phase += freq; - /* integrators: - The calculations are ordered so that each integrator - takes a result from previous loop iteration - to make the code more "pipeline-friendly". */ - ig2a += ig1a; ig2b += ig1b; - ig1a += ig0a; ig1b += ig0b; - ig0a += in_a; ig0b += in_b; - } - inp += 2*factor; - // comb filters: - out0a = ig2a - comb0a; out0b = ig2b - comb0b; - comb0a = ig2a; comb0b = ig2b; - out1a = out0a - comb1a; out1b = out0b - comb1b; - comb1a = out0a; comb1b = out0b; - - output[k].i = (float)out1a * gain; - output[k].q = (float)out1b * gain; - } - - s->ig0a = ig0a; s->ig0b = ig0b; - s->ig1a = ig1a; s->ig1b = ig1b; - s->comb0a = comb0a; s->comb0b = comb0b; - s->comb1a = comb1a; s->comb1b = comb1b; - s->phase = phase; + cicddc_t *s = state; + int k; + int factor = s->factor; + cic_dt ig0a = s->ig0a, ig0b = s->ig0b, ig1a = s->ig1a, ig1b = s->ig1b; + cic_dt comb0a = s->comb0a, comb0b = s->comb0b, comb1a = s->comb1a, comb1b = s->comb1b; + uint64_t phase = s->phase, freq; + int16_t *sinetable = s->sinetable; + float gain = s->gain; + + freq = rate * ((float)(1ULL << 63) * 2); + + uint8_t *inp = input; + for(k = 0; k < outsize; k++) { + int i; + cic_dt out0a, out0b, out1a, out1b; + cic_dt ig2a = 0, ig2b = 0; // last integrator and first comb replaced simply by sum + for(i = 0; i < factor; i++) { + cic_dt in_a, in_b; + int32_t m_a, m_b, m_c, m_d; + int sinep = phase >> (64-SINESHIFT); + // subtract 127.4 (good for rtl-sdr) + m_a = (((int32_t)inp[2*i]) << 8) - 32614; + m_b = (((int32_t)inp[2*i+1]) << 8) - 32614; + m_c = (int32_t)sinetable[sinep + (1<<(SINESHIFT-2))]; + m_d = (int32_t)sinetable[sinep]; + // complex multiplication: + in_a = m_a*m_c - m_b*m_d; + in_b = m_a*m_d + m_b*m_c; + phase += freq; + /* integrators: + The calculations are ordered so that each integrator + takes a result from previous loop iteration + to make the code more "pipeline-friendly". */ + ig2a += ig1a; ig2b += ig1b; + ig1a += ig0a; ig1b += ig0b; + ig0a += in_a; ig0b += in_b; + } + inp += 2*factor; + // comb filters: + out0a = ig2a - comb0a; out0b = ig2b - comb0b; + comb0a = ig2a; comb0b = ig2b; + out1a = out0a - comb1a; out1b = out0b - comb1b; + comb1a = out0a; comb1b = out0b; + + output[k].i = (float)out1a * gain; + output[k].q = (float)out1b * gain; + } + + s->ig0a = ig0a; s->ig0b = ig0b; + s->ig1a = ig1a; s->ig1b = ig1b; + s->comb0a = comb0a; s->comb0b = comb0b; + s->comb1a = comb1a; s->comb1b = comb1b; + s->phase = phase; } @@ -1483,10 +1483,10 @@ void apply_precalculated_window_c(complexf* input, complexf* output, int size, f void apply_precalculated_window_f(float* input, float* output, int size, float *windowt) { - for(int i=0;i