@@ -15,6 +15,7 @@ import (
1515 "go.temporal.io/sdk/log"
1616
1717 "github.com/PeerDB-io/peerdb/flow/generated/protos"
18+ "github.com/PeerDB-io/peerdb/flow/internal"
1819 "github.com/PeerDB-io/peerdb/flow/model"
1920 "github.com/PeerDB-io/peerdb/flow/shared"
2021 "github.com/PeerDB-io/peerdb/flow/shared/datatypes"
@@ -24,19 +25,20 @@ import (
2425type QRepQueryExecutor struct {
2526 * PostgresConnector
2627 logger log.Logger
28+ env map [string ]string
2729 snapshot string
2830 flowJobName string
2931 partitionID string
3032 version uint32
3133}
3234
33- func (c * PostgresConnector ) NewQRepQueryExecutor (ctx context.Context , version uint32 ,
35+ func (c * PostgresConnector ) NewQRepQueryExecutor (ctx context.Context , env map [ string ] string , version uint32 ,
3436 flowJobName string , partitionID string ,
3537) (* QRepQueryExecutor , error ) {
36- return c .NewQRepQueryExecutorSnapshot (ctx , version , "" , flowJobName , partitionID )
38+ return c .NewQRepQueryExecutorSnapshot (ctx , env , version , "" , flowJobName , partitionID )
3739}
3840
39- func (c * PostgresConnector ) NewQRepQueryExecutorSnapshot (ctx context.Context , version uint32 ,
41+ func (c * PostgresConnector ) NewQRepQueryExecutorSnapshot (ctx context.Context , env map [ string ] string , version uint32 ,
4042 snapshot string , flowJobName string , partitionID string ,
4143) (* QRepQueryExecutor , error ) {
4244 if _ , err := c .fetchCustomTypeMapping (ctx ); err != nil {
@@ -45,6 +47,7 @@ func (c *PostgresConnector) NewQRepQueryExecutorSnapshot(ctx context.Context, ve
4547 }
4648 return & QRepQueryExecutor {
4749 PostgresConnector : c ,
50+ env : env ,
4851 snapshot : snapshot ,
4952 flowJobName : flowJobName ,
5053 partitionID : partitionID ,
@@ -67,63 +70,209 @@ func (qe *QRepQueryExecutor) cursorToSchema(
6770 ctx context.Context ,
6871 tx pgx.Tx ,
6972 cursorName string ,
70- ) (types.QRecordSchema , error ) {
71- type attId struct {
72- relid uint32
73- num uint16
73+ ) (types.QRecordSchema , * types. NullableSchemaDebug , error ) {
74+ laxMode , err := internal . PeerDBAvroNullableLax ( ctx , qe . env )
75+ if err != nil {
76+ return types. QRecordSchema {}, nil , err
7477 }
7578
7679 rows , err := tx .Query (ctx , "FETCH 0 FROM " + cursorName )
7780 if err != nil {
78- return types.QRecordSchema {}, fmt .Errorf ("failed to fetch 0 for field descriptions: %w" , err )
81+ return types.QRecordSchema {}, nil , fmt .Errorf ("failed to fetch 0 for field descriptions: %w" , err )
7982 }
8083 fds := rows .FieldDescriptions ()
84+ rows .Close ()
85+
8186 tableOIDset := make (map [uint32 ]struct {})
82- nullPointers := make (map [attId ]* bool , len (fds ))
8387 qfields := make ([]types.QField , len (fds ))
88+
89+ // In lax mode: track debug info and map attIds to field indices
90+ // In strict mode: track pointers to nullable fields
91+ var schemaDebug * types.NullableSchemaDebug
92+ var attIdToFieldIdx map [attId ][]int // lax mode
93+ var nullPointers map [attId ]* bool // strict mode
94+
95+ if laxMode {
96+ schemaDebug = & types.NullableSchemaDebug {
97+ PgxFields : make ([]types.PgxFieldDebug , len (fds )),
98+ StrictNullable : make ([]bool , len (fds )),
99+ }
100+ attIdToFieldIdx = make (map [attId ][]int , len (fds ))
101+ } else {
102+ nullPointers = make (map [attId ]* bool , len (fds ))
103+ }
104+
84105 for i , fd := range fds {
85106 tableOIDset [fd .TableOID ] = struct {}{}
86107 ctype := qe .postgresOIDToQValueKind (fd .DataTypeOID , qe .customTypeMapping , qe .version )
108+
87109 if ctype == types .QValueKindNumeric || ctype == types .QValueKindArrayNumeric {
88110 precision , scale := datatypes .ParseNumericTypmod (fd .TypeModifier )
89111 qfields [i ] = types.QField {
90112 Name : fd .Name ,
91113 Type : ctype ,
92- Nullable : false ,
114+ Nullable : laxMode , // lax=true, strict=false (until pg_attribute says otherwise)
93115 Precision : precision ,
94116 Scale : scale ,
95117 }
96118 } else {
97119 qfields [i ] = types.QField {
98120 Name : fd .Name ,
99121 Type : ctype ,
100- Nullable : false ,
122+ Nullable : laxMode ,
123+ }
124+ }
125+
126+ key := attId {relid : fd .TableOID , num : fd .TableAttributeNumber }
127+ if laxMode {
128+ schemaDebug .PgxFields [i ] = types.PgxFieldDebug {
129+ Name : fd .Name ,
130+ TableOID : fd .TableOID ,
131+ TableAttributeNumber : fd .TableAttributeNumber ,
132+ DataTypeOID : fd .DataTypeOID ,
101133 }
134+ attIdToFieldIdx [key ] = append (attIdToFieldIdx [key ], i )
135+ } else {
136+ nullPointers [key ] = & qfields [i ].Nullable
102137 }
103- nullPointers [attId {
104- relid : fd .TableOID ,
105- num : fd .TableAttributeNumber ,
106- }] = & qfields [i ].Nullable
107138 }
108- rows . Close ()
139+
109140 tableOIDs := slices .Collect (maps .Keys (tableOIDset ))
141+ if laxMode {
142+ schemaDebug .QueriedTableOIDs = tableOIDs
143+ }
144+
145+ // Query pg_attribute - different queries for lax vs strict
146+ if laxMode {
147+ if err := qe .populateLaxModeDebugInfo (ctx , tx , tableOIDs , schemaDebug , attIdToFieldIdx ); err != nil {
148+ return types.QRecordSchema {}, nil , err
149+ }
150+ } else {
151+ // Strict mode: minimal query, just need nullable columns
152+ rows , err := tx .Query (ctx ,
153+ "SELECT a.attrelid, a.attnum FROM pg_attribute a WHERE a.attrelid = ANY($1) AND NOT a.attnotnull" ,
154+ tableOIDs )
155+ if err != nil {
156+ return types.QRecordSchema {}, nil , fmt .Errorf ("failed to query pg_attribute: %w" , err )
157+ }
158+
159+ var att attId
160+ if _ , err := pgx .ForEachRow (rows , []any {& att .relid , & att .num }, func () error {
161+ if nullPointer , ok := nullPointers [att ]; ok {
162+ * nullPointer = true
163+ }
164+ return nil
165+ }); err != nil {
166+ return types.QRecordSchema {}, nil , fmt .Errorf ("failed to process pg_attribute: %w" , err )
167+ }
168+ }
110169
111- rows , err = tx .Query (ctx , "SELECT a.attrelid,a.attnum FROM pg_attribute a WHERE a.attrelid = ANY($1) AND NOT a.attnotnull" , tableOIDs )
170+ return types .NewQRecordSchema (qfields ), schemaDebug , nil
171+ }
172+
173+ type attId struct {
174+ relid uint32
175+ num uint16
176+ }
177+
178+ // populateLaxModeDebugInfo populates debug info for diagnosing nullable mismatches.
179+ // The aim is to capture enough data that the customer can change the schema in any way
180+ // after the snapshot transaction is done and we still have a way to debug.
181+ func (qe * QRepQueryExecutor ) populateLaxModeDebugInfo (
182+ ctx context.Context ,
183+ tx pgx.Tx ,
184+ tableOIDs []uint32 ,
185+ schemaDebug * types.NullableSchemaDebug ,
186+ attIdToFieldIdx map [attId ][]int ,
187+ ) error {
188+ // First, expand tableOIDs to include all parent tables (for full column info)
189+ allTableOIDs := make (map [uint32 ]struct {})
190+ for _ , oid := range tableOIDs {
191+ allTableOIDs [oid ] = struct {}{}
192+ }
193+ parentOIDByTableOID := make (map [uint32 ]uint32 )
194+
195+ // Iteratively find all parent tables
196+ oidsToQuery := tableOIDs
197+ for len (oidsToQuery ) > 0 {
198+ rows , err := tx .Query (ctx , `SELECT inhrelid, inhparent FROM pg_inherits WHERE inhrelid = ANY($1)` , oidsToQuery )
199+ if err != nil {
200+ return fmt .Errorf ("failed to query pg_inherits: %w" , err )
201+ }
202+ var childOID , parentOID uint32
203+ var nextOids []uint32
204+ if _ , err := pgx .ForEachRow (rows , []any {& childOID , & parentOID }, func () error {
205+ parentOIDByTableOID [childOID ] = parentOID
206+ if _ , seen := allTableOIDs [parentOID ]; ! seen {
207+ allTableOIDs [parentOID ] = struct {}{}
208+ nextOids = append (nextOids , parentOID )
209+ }
210+ return nil
211+ }); err != nil {
212+ return fmt .Errorf ("failed to process pg_inherits: %w" , err )
213+ }
214+ oidsToQuery = nextOids
215+ }
216+
217+ allOIDSlice := slices .Collect (maps .Keys (allTableOIDs ))
218+
219+ // Query pg_attribute for ALL tables (children + parents)
220+ rows , err := tx .Query (ctx , `
221+ SELECT a.attrelid, a.attnum, a.attname, a.attnotnull, a.atttypid, a.attinhcount, a.attislocal
222+ FROM pg_attribute a
223+ WHERE a.attrelid = ANY($1) AND a.attnum > 0 AND NOT a.attisdropped
224+ ORDER BY a.attrelid, a.attnum` ,
225+ allOIDSlice )
112226 if err != nil {
113- return types. QRecordSchema {}, fmt .Errorf ("failed to query schema for field descriptions : %w" , err )
227+ return fmt .Errorf ("failed to query pg_attribute : %w" , err )
114228 }
115229
116- var att attId
117- if _ , err := pgx .ForEachRow (rows , []any {& att .relid , & att .num }, func () error {
118- if nullPointer , ok := nullPointers [att ]; ok {
119- * nullPointer = true
230+ var row types.PgAttributeDebug
231+ if _ , err := pgx .ForEachRow (rows , []any {
232+ & row .AttRelID , & row .AttNum , & row .AttName , & row .AttNotNull , & row .AttTypID , & row .AttInhCount , & row .AttIsLocal ,
233+ }, func () error {
234+ schemaDebug .PgAttributeRows = append (schemaDebug .PgAttributeRows , row )
235+
236+ // Compute strict nullable: if NOT attnotnull and matches a field, mark it nullable
237+ if ! row .AttNotNull {
238+ key := attId {relid : row .AttRelID , num : uint16 (row .AttNum )}
239+ if indices , ok := attIdToFieldIdx [key ]; ok {
240+ for _ , idx := range indices {
241+ schemaDebug .StrictNullable [idx ] = true
242+ }
243+ }
120244 }
121245 return nil
122246 }); err != nil {
123- return types.QRecordSchema {}, fmt .Errorf ("failed to process schema for field descriptions: %w" , err )
247+ return fmt .Errorf ("failed to process pg_attribute: %w" , err )
248+ }
249+
250+ // Query table names and schemas for all tables
251+ rows , err = tx .Query (ctx , `
252+ SELECT c.oid, c.relname, n.nspname
253+ FROM pg_class c
254+ JOIN pg_namespace n ON c.relnamespace = n.oid
255+ WHERE c.oid = ANY($1)` ,
256+ allOIDSlice )
257+ if err != nil {
258+ return fmt .Errorf ("failed to query pg_class: %w" , err )
259+ }
260+
261+ var oid uint32
262+ var tableName , schemaName string
263+ if _ , err := pgx .ForEachRow (rows , []any {& oid , & tableName , & schemaName }, func () error {
264+ schemaDebug .Tables = append (schemaDebug .Tables , types.TableDebug {
265+ OID : oid ,
266+ TableName : tableName ,
267+ SchemaName : schemaName ,
268+ ParentOID : parentOIDByTableOID [oid ],
269+ })
270+ return nil
271+ }); err != nil {
272+ return fmt .Errorf ("failed to process pg_class: %w" , err )
124273 }
125274
126- return types . NewQRecordSchema ( qfields ), nil
275+ return nil
127276}
128277
129278func (qe * QRepQueryExecutor ) processRowsStream (
0 commit comments