From c2b27cf20ed37e955656dbf26813d8d2b3f21378 Mon Sep 17 00:00:00 2001 From: wijay Date: Fri, 18 Oct 2019 11:40:39 -0500 Subject: [PATCH] Add new gvar type for postgres --- local_db/postgres_gvar/Makefile | 35 ++++ local_db/postgres_gvar/gvar.c | 263 +++++++++++++++++++++++++++++ local_db/postgres_gvar/gvar.source | 218 ++++++++++++++++++++++++ 3 files changed, 516 insertions(+) create mode 100644 local_db/postgres_gvar/Makefile create mode 100644 local_db/postgres_gvar/gvar.c create mode 100644 local_db/postgres_gvar/gvar.source diff --git a/local_db/postgres_gvar/Makefile b/local_db/postgres_gvar/Makefile new file mode 100644 index 0000000..36498f2 --- /dev/null +++ b/local_db/postgres_gvar/Makefile @@ -0,0 +1,35 @@ +#------------------------------------------------------------------------- +# Author: William Jay (wjay@fnal.gov) +# Adapted from +# https://github.com/postgres/postgres/blob/master/src/tutorial/Makefile +# Makefile-- +# Makefile for tutorial +# +# By default, this builds against an existing PostgreSQL installation +# (the one identified by whichever pg_config is first in your path). +# Within a configured source tree, you can say "make NO_PGXS=1 all" +# to build using the surrounding source tree. +# +# IDENTIFICATION +# src/tutorial/Makefile +# +#------------------------------------------------------------------------- + +MODULES = gvar +DATA_built = gvar.sql + +ifdef NO_PGXS +subdir = src/tutorial +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/src/makefiles/pgxs.mk +else +PG_CONFIG = /Applications/Postgres.app/Contents/Versions/latest/bin/pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +endif + +%.sql: %.source + rm -f $@; \ + C=`pwd`; \ + sed -e "s:_OBJWD_:$$C:g" < $< > $@ \ No newline at end of file diff --git a/local_db/postgres_gvar/gvar.c b/local_db/postgres_gvar/gvar.c new file mode 100644 index 0000000..3a760b7 --- /dev/null +++ b/local_db/postgres_gvar/gvar.c @@ -0,0 +1,263 @@ +/* Author: William Jay (wjay@fnal.gov) + * Adapted from + * https://github.com/postgres/postgres/blob/master/src/tutorial/complex.c + * + * + ****************************************************************************** + This file contains routines that can be bound to a Postgres backend and + called by the backend in the process of processing queries. The calling + format for these routines is dictated by Postgres architecture. +******************************************************************************/ + +#include "postgres.h" + +#include "fmgr.h" +#include "libpq/pqformat.h" /* needed for send/recv functions */ +#include "math.h" /* needed for sqrt */ + +PG_MODULE_MAGIC; + +typedef struct Gvar +{ + double mean; + double sdev; +} Gvar; + + +/***************************************************************************** + * Input/Output functions + *****************************************************************************/ + +PG_FUNCTION_INFO_V1(gvar_in); + +Datum +gvar_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + double mean, sdev; + Gvar *result; + + if (sscanf(str, " ( %lf , %lf )", &mean, &sdev) != 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s: \"%s\"", + "gvar", str))); + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = mean; + result->sdev = sdev; + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gvar_out); + +Datum +gvar_out(PG_FUNCTION_ARGS) +{ + Gvar *gvar = (Gvar *) PG_GETARG_POINTER(0); + char *result; + + result = psprintf("(%g,%g)", gvar->mean, gvar->sdev); + PG_RETURN_CSTRING(result); +} + +/***************************************************************************** + * Binary Input/Output functions + * + * These are optional. + *****************************************************************************/ + +PG_FUNCTION_INFO_V1(gvar_recv); + +Datum +gvar_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Gvar *result; + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = pq_getmsgfloat8(buf); + result->sdev = pq_getmsgfloat8(buf); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gvar_send); + +Datum +gvar_send(PG_FUNCTION_ARGS) +{ + Gvar *gvar = (Gvar *) PG_GETARG_POINTER(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, gvar->mean); + pq_sendfloat8(&buf, gvar->sdev); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/***************************************************************************** + * New Operators + * + * A practical Gvar datatype would provide much more than this, of course. + *****************************************************************************/ + +PG_FUNCTION_INFO_V1(gvar_add); + +Datum +gvar_add(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + Gvar *result; + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = a->mean + b->mean; + result->sdev = sqrt((a)->sdev*(a)->sdev + (b)->sdev*(b)->sdev); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gvar_subtract); + +Datum +gvar_subtract(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + Gvar *result; + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = a->mean - b->mean; + result->sdev = sqrt((a)->sdev*(a)->sdev + (b)->sdev*(b)->sdev); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gvar_multiply); + +Datum +gvar_multiply(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + Gvar *result; + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = (a)->mean*(b)->mean; + result->sdev = sqrt( + (b)->mean*(b)->mean * (a)->sdev*(a)->sdev + + (a)->mean*(a)->mean * (b)->sdev*(b)->sdev + ); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(gvar_divide); + +Datum +gvar_divide(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + Gvar *result; + + result = (Gvar *) palloc(sizeof(Gvar)); + result->mean = (a)->mean/(b)->mean; + result->sdev = sqrt( + (a)->sdev*(a)->sdev / ((b)->mean*(b)->mean) + + (a)->mean*(a)->mean * (b)->sdev*(b)->sdev + / ((b)->mean*(b)->mean*(b)->mean*(b)->mean) + ); + PG_RETURN_POINTER(result); +} + +/***************************************************************************** + * Operator class for defining B-tree index + * + * It's essential that the comparison operators and support function for a + * B-tree index opclass always agree on the relative ordering of any two + * data values. Experience has shown that it's depressingly easy to write + * unintentionally inconsistent functions. One way to reduce the odds of + * making a mistake is to make all the functions simple wrappers around + * an internal three-way-comparison function, as we do here. + * + * Ordering operators for gvars will simply use the mean, ignoring the error. + *****************************************************************************/ + + +static int +gvar_mean_cmp_internal(Gvar * a, Gvar * b) +{ + double amean = a->mean, + bmean = b->mean; + + if (amean < bmean) + return -1; + if (amean > bmean) + return 1; + return 0; +} + + +PG_FUNCTION_INFO_V1(gvar_mean_lt); + +Datum +gvar_mean_lt(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(gvar_mean_cmp_internal(a, b) < 0); +} + +PG_FUNCTION_INFO_V1(gvar_mean_le); + +Datum +gvar_mean_le(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(gvar_mean_cmp_internal(a, b) <= 0); +} + +PG_FUNCTION_INFO_V1(gvar_mean_eq); + +Datum +gvar_mean_eq(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(gvar_mean_cmp_internal(a, b) == 0); +} + +PG_FUNCTION_INFO_V1(gvar_mean_ge); + +Datum +gvar_mean_ge(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(gvar_mean_cmp_internal(a, b) >= 0); +} + +PG_FUNCTION_INFO_V1(gvar_mean_gt); + +Datum +gvar_mean_gt(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(gvar_mean_cmp_internal(a, b) > 0); +} + +PG_FUNCTION_INFO_V1(gvar_mean_cmp); + +Datum +gvar_mean_cmp(PG_FUNCTION_ARGS) +{ + Gvar *a = (Gvar *) PG_GETARG_POINTER(0); + Gvar *b = (Gvar *) PG_GETARG_POINTER(1); + + PG_RETURN_INT32(gvar_mean_cmp_internal(a, b)); +} \ No newline at end of file diff --git a/local_db/postgres_gvar/gvar.source b/local_db/postgres_gvar/gvar.source new file mode 100644 index 0000000..a88d0e3 --- /dev/null +++ b/local_db/postgres_gvar/gvar.source @@ -0,0 +1,218 @@ +--------------------------------------------------------------------------- +-- Author: William Jay (wjay@fnal.gov) +-- Adapted from +-- https://github.com/postgres/postgres/blob/master/src/tutorial/complex.source +-- gvar.sql- +-- This file shows how to create a new user-defined type and how to +-- use this new type. +-- +-- +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- Portions Copyright (c) 1994, Regents of the University of California +-- +--------------------------------------------------------------------------- +----------------------------- +-- Creating a new type: +-- We are going to create a new type called 'gvar' which represents +-- gaussian random variables for modelling "measurements with errors". +-- A user-defined type must have an input and an output function, and +-- optionally can have binary input and output functions. All of these +-- are usually user-defined C functions. +----------------------------- +-- Assume the user defined functions are in _OBJWD_/gvar$DLSUFFIX +-- (we do not want to assume this is in the dynamic loader search path). +-- Look at $PWD/gvar.c for the source. Note that we declare all of +-- them as STRICT, so we do not need to cope with NULL inputs in the +-- C code. We also mark them IMMUTABLE, since they always return the +-- same outputs given the same inputs. +-- the input function 'gvar_in' takes a null-terminated string (the +-- textual representation of the type) and turns it into the internal +-- (in memory) representation. You will get a message telling you 'gvar' +-- does not exist yet but that's okay. +CREATE FUNCTION gvar_in(cstring) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +-- the output function 'gvar_out' takes the internal representation and +-- converts it into the textual representation. +CREATE FUNCTION gvar_out(gvar) + RETURNS cstring + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +-- the binary input function 'gvar_recv' takes a StringInfo buffer +-- and turns its contents into the internal representation. +CREATE FUNCTION gvar_recv(internal) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +-- the binary output function 'gvar_send' takes the internal representation +-- and converts it into a (hopefully) platform-independent bytea string. +CREATE FUNCTION gvar_send(gvar) + RETURNS bytea + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +-- now, we can create the type. The internallength specifies the size of the +-- memory block required to hold the type (we need two 8-byte doubles). +CREATE TYPE gvar ( + internallength = 16, + input = gvar_in, + output = gvar_out, + receive = gvar_recv, + send = gvar_send, + alignment = double +); +----------------------------- +-- Using the new type: +-- user-defined types can be used like ordinary built-in types. +----------------------------- +-- eg. we can use it in a table +CREATE TABLE test_gvar ( + a gvar, + b gvar +); +-- data for user-defined types are just strings in the proper textual +-- representation. +INSERT INTO test_gvar VALUES ('(1.0, 2.5)', '(4.2, 3.55 )'); +INSERT INTO test_gvar VALUES ('(33.0, 51.4)', '(100.42, 93.55)'); +SELECT * FROM test_gvar; +----------------------------- +-- Creating an operator for the new type: +-- Let's define an add operator for gvar types. Since POSTGRES +-- supports function overloading, we'll use + as the add operator. +-- (Operator names can be reused with different numbers and types of +-- arguments.) +----------------------------- +-- first, define a artithmetic function gvar_add, gvar_subtract, gvar_multiply, +-- and gvar_divide (also all defined in gvar.c) +CREATE FUNCTION gvar_add(gvar, gvar) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_subtract(gvar, gvar) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_multiply(gvar, gvar) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_divide(gvar, gvar) + RETURNS gvar + AS '_OBJWD_/gvar' + LANGUAGE C IMMUTABLE STRICT; + +-- we can now define the operator. We show a binary operator here but you +-- can also define unary operators by omitting either of leftarg or rightarg. +CREATE OPERATOR + ( + leftarg = gvar, + rightarg = gvar, + procedure = gvar_add, + commutator = + +); +CREATE OPERATOR - ( + leftarg = gvar, + rightarg = gvar, + procedure = gvar_subtract +); +CREATE OPERATOR * ( + leftarg = gvar, + rightarg = gvar, + procedure = gvar_multiply, + commutator = * +); +CREATE OPERATOR / ( + leftarg = gvar, + rightarg = gvar, + procedure = gvar_divide +); +SELECT (a + b) AS c FROM test_gvar; +SELECT (a - b) AS c FROM test_gvar; +SELECT (a * b) AS c FROM test_gvar; +SELECT (a / b) AS c FROM test_gvar; +-- Occasionally, you may find it useful to cast the string to the desired +-- type explicitly. :: denotes a type cast. +SELECT a + '(1.0,1.0)'::gvar AS aa, + b + '(1.0,1.0)'::gvar AS bb + FROM test_gvar; +----------------------------- +-- Creating aggregate functions +-- you can also define aggregate functions. The syntax is somewhat +-- cryptic but the idea is to express the aggregate in terms of state +-- transition functions. +----------------------------- +CREATE AGGREGATE gvar_sum ( + sfunc = gvar_add, + basetype = gvar, + stype = gvar, + initcond = '(0,0)' +); +SELECT gvar_sum(a) FROM test_gvar; +----------------------------- +-- Interfacing New Types with Indexes: +-- We cannot define a secondary index (eg. a B-tree) over the new type +-- yet. We need to create all the required operators and support +-- functions, then we can make the operator class. +----------------------------- +-- first, define the required operators +CREATE FUNCTION gvar_mean_lt(gvar, gvar) RETURNS bool + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_mean_le(gvar, gvar) RETURNS bool + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_mean_eq(gvar, gvar) RETURNS bool + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_mean_ge(gvar, gvar) RETURNS bool + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gvar_mean_gt(gvar, gvar) RETURNS bool + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +CREATE OPERATOR < ( + leftarg = gvar, rightarg = gvar, procedure = gvar_mean_lt, + commutator = > , negator = >= , + restrict = scalarltsel, join = scalarltjoinsel +); +CREATE OPERATOR <= ( + leftarg = gvar, rightarg = gvar, procedure = gvar_mean_le, + commutator = >= , negator = > , + restrict = scalarlesel, join = scalarlejoinsel +); +CREATE OPERATOR = ( + leftarg = gvar, rightarg = gvar, procedure = gvar_mean_eq, + commutator = = , + -- leave out negator since we didn't create <> operator + -- negator = <> , + restrict = eqsel, join = eqjoinsel +); +CREATE OPERATOR >= ( + leftarg = gvar, rightarg = gvar, procedure = gvar_mean_ge, + commutator = <= , negator = < , + restrict = scalargesel, join = scalargejoinsel +); +CREATE OPERATOR > ( + leftarg = gvar, rightarg = gvar, procedure = gvar_mean_gt, + commutator = < , negator = <= , + restrict = scalargtsel, join = scalargtjoinsel +); +-- create the support function too +CREATE FUNCTION gvar_mean_cmp(gvar, gvar) RETURNS int4 + AS '_OBJWD_/gvar' LANGUAGE C IMMUTABLE STRICT; +-- now we can make the operator class +CREATE OPERATOR CLASS gvar_mean_ops + DEFAULT FOR TYPE gvar USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 gvar_mean_cmp(gvar, gvar); +-- now, we can define a btree index on gvar types. First, let's populate +-- the table. Note that postgres needs many more tuples to start using the +-- btree index during selects. +INSERT INTO test_gvar VALUES ('(56.0,-22.5)', '(-43.2,-0.07)'); +INSERT INTO test_gvar VALUES ('(-91.9,33.6)', '(8.6,3.0)'); +CREATE INDEX test_cplx_ind ON test_gvar + USING btree(a gvar_mean_ops); +SELECT * from test_gvar where a = '(56.0,-22.5)'; +SELECT * from test_gvar where a < '(56.0,-22.5)'; +SELECT * from test_gvar where a > '(56.0,-22.5)'; +-- clean up the example +DROP TABLE test_gvar; +DROP TYPE gvar CASCADE; \ No newline at end of file