From be055d7dfe30132cbd6bbb86ce3472ef5dbea44f Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Tue, 10 Oct 2023 09:06:45 -0500 Subject: [PATCH 01/13] Saving work --- cmd/internal/schema_builder.go | 31 +++ lib/connect_client.go | 341 +++++++++++++++++++++++++++++++++ lib/connect_client_test.go | 298 ++++++++++++++++++++++++++++ lib/mysql_client.go | 200 +++++++++++++++++++ lib/planetscale_source.go | 96 ++++++++++ lib/test_types.go | 127 ++++++++++++ lib/types.go | 78 ++++++++ 7 files changed, 1171 insertions(+) create mode 100644 cmd/internal/schema_builder.go create mode 100644 lib/connect_client.go create mode 100644 lib/connect_client_test.go create mode 100644 lib/mysql_client.go create mode 100644 lib/planetscale_source.go create mode 100644 lib/test_types.go create mode 100644 lib/types.go diff --git a/cmd/internal/schema_builder.go b/cmd/internal/schema_builder.go new file mode 100644 index 0000000..feea265 --- /dev/null +++ b/cmd/internal/schema_builder.go @@ -0,0 +1,31 @@ +package internal + +import "github.com/planetscale/airbyte-source/lib" + +type schemaBuilder struct { + treatTinyIntAsBoolean bool + catalog *Catalog +} + +func NewSchemaBuilder(treatTinyIntAsBoolean bool) lib.SchemaBuilder { + return &schemaBuilder{ + treatTinyIntAsBoolean: treatTinyIntAsBoolean, + } +} + +func (sb *schemaBuilder) OnKeyspace(keyspaceName string) { + if sb.catalog == nil { + sb.catalog = &Catalog{} + } + panic("implement me") +} + +func (schemaBuilder) OnTable(keyspaceName, tableName string) { + //TODO implement me + panic("implement me") +} + +func (schemaBuilder) OnColumns(keyspaceName, tableName string, columns []lib.MysqlColumn) { + //TODO implement me + panic("implement me") +} diff --git a/lib/connect_client.go b/lib/connect_client.go new file mode 100644 index 0000000..a2fa127 --- /dev/null +++ b/lib/connect_client.go @@ -0,0 +1,341 @@ +package lib + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" + + "vitess.io/vitess/go/vt/proto/query" + + "github.com/pkg/errors" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/psdb/auth" + grpcclient "github.com/planetscale/psdb/core/pool" + clientoptions "github.com/planetscale/psdb/core/pool/options" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "vitess.io/vitess/go/sqltypes" + + _ "vitess.io/vitess/go/vt/vtctl/grpcvtctlclient" + _ "vitess.io/vitess/go/vt/vtgate/grpcvtgateconn" +) + +type ( + OnResult func(*sqltypes.Result, Operation) error + OnUpdate func(*UpdatedRow) error + OnCursor func(*psdbconnect.TableCursor) error +) + +type DatabaseLogger interface { + Info(string) +} + +// ConnectClient is a general purpose interface +// that defines all the data access methods needed for the PlanetScale Fivetran source to function. +type ConnectClient interface { + CanConnect(ctx context.Context, ps PlanetScaleSource) error + Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) + ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) +} + +func NewConnectClient(mysqlAccess *MysqlClient) ConnectClient { + return &connectClient{ + Mysql: mysqlAccess, + } +} + +// connectClient is an implementation of the ConnectClient interface defined above. +// It uses the mysql interface provided by PlanetScale for all schema/shard/tablet discovery and +// the grpc API for incrementally syncing rows from PlanetScale. +type connectClient struct { + clientFn func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) + Mysql *MysqlClient +} + +func (p connectClient) ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) { + return (*p.Mysql).GetVitessShards(ctx, ps) +} + +func (p connectClient) CanConnect(ctx context.Context, ps PlanetScaleSource) error { + if *p.Mysql == nil { + return status.Error(codes.Internal, "Mysql access is uninitialized") + } + + if err := p.checkEdgePassword(ctx, ps); err != nil { + return errors.Wrap(err, "Unable to initialize Connect Session") + } + + return (*p.Mysql).PingContext(ctx, ps) +} + +func (p connectClient) checkEdgePassword(ctx context.Context, psc PlanetScaleSource) error { + if !strings.HasSuffix(psc.Host, ".connect.psdb.cloud") { + return errors.New("This password is not connect-enabled, please ensure that your organization is enrolled in the Connect beta.") + } + reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fmt.Sprintf("https://%v", psc.Host), nil) + if err != nil { + return err + } + + _, err = http.DefaultClient.Do(req) + if err != nil { + return errors.Errorf("The database %q, hosted at %q, is inaccessible from this process", psc.Database, psc.Host) + } + + return nil +} + +// Read streams rows from a table given a starting cursor. +// 1. We will get the latest vgtid for a given table in a shard when a sync session starts. +// 2. This latest vgtid is now the stopping point for this sync session. +// 3. Ask vstream to stream from the last known vgtid +// 4. When we reach the stopping point, read all rows available at this vgtid +// 5. End the stream when (a) a vgtid newer than latest vgtid is encountered or (b) the timeout kicks in. +func (p connectClient) Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) { + var ( + err error + sErr error + currentSerializedCursor *SerializedCursor + ) + + tabletType := psdbconnect.TabletType_primary + currentPosition := lastKnownPosition + readDuration := 1 * time.Minute + preamble := fmt.Sprintf("[%v:%v shard : %v] ", ps.Database, tableName, currentPosition.Shard) + for { + logger.Info(preamble + "peeking to see if there's any new rows") + latestCursorPosition, lcErr := p.getLatestCursorPosition(ctx, currentPosition.Shard, currentPosition.Keyspace, tableName, ps, tabletType) + if lcErr != nil { + return currentSerializedCursor, errors.Wrap(err, "Unable to get latest cursor position") + } + + // the current vgtid is the same as the last synced vgtid, no new rows. + if latestCursorPosition == currentPosition.Position { + logger.Info(preamble + "no new rows found, exiting") + return TableCursorToSerializedCursor(currentPosition) + } + logger.Info(fmt.Sprintf("new rows found, syncing rows for %v", readDuration)) + logger.Info(fmt.Sprintf(preamble+"syncing rows with cursor [%v]", currentPosition)) + + currentPosition, err = p.sync(ctx, logger, tableName, columns, currentPosition, latestCursorPosition, ps, tabletType, readDuration, onResult, onCursor, onUpdate) + if currentPosition.Position != "" { + currentSerializedCursor, sErr = TableCursorToSerializedCursor(currentPosition) + if sErr != nil { + // if we failed to serialize here, we should bail. + return currentSerializedCursor, errors.Wrap(sErr, "unable to serialize current position") + } + } + if err != nil { + if s, ok := status.FromError(err); ok { + // if the error is anything other than server timeout, keep going + if s.Code() != codes.DeadlineExceeded { + logger.Info(fmt.Sprintf("%v Got error [%v] with message [%q], Returning with cursor :[%v] after server timeout", preamble, s.Code(), err, currentPosition)) + return currentSerializedCursor, nil + } else { + logger.Info(preamble + "Continuing with cursor after server timeout") + } + } else if errors.Is(err, io.EOF) { + logger.Info(fmt.Sprintf("%vFinished reading all rows for table [%v]", preamble, tableName)) + return currentSerializedCursor, nil + } else { + logger.Info(fmt.Sprintf("non-grpc error [%v]]", err)) + return currentSerializedCursor, err + } + } + } +} + +func (p connectClient) sync(ctx context.Context, logger DatabaseLogger, tableName string, columns []string, tc *psdbconnect.TableCursor, stopPosition string, ps PlanetScaleSource, tabletType psdbconnect.TabletType, readDuration time.Duration, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*psdbconnect.TableCursor, error) { + ctx, cancel := context.WithTimeout(ctx, readDuration) + defer cancel() + + var ( + err error + client psdbconnect.ConnectClient + ) + + if p.clientFn == nil { + conn, err := grpcclient.Dial(ctx, ps.Host, + clientoptions.WithDefaultTLSConfig(), + clientoptions.WithCompression(true), + clientoptions.WithConnectionPool(1), + clientoptions.WithExtraCallOption( + auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), + ), + ) + if err != nil { + return tc, err + } + defer conn.Close() + client = psdbconnect.NewConnectClient(conn) + } else { + client, err = p.clientFn(ctx, ps) + if err != nil { + return tc, err + } + } + + if tc.LastKnownPk != nil { + tc.Position = "" + } + + logger.Info(fmt.Sprintf("Syncing with cursor position : [%v], using last known PK : %v, stop cursor is : [%v]", tc.Position, tc.LastKnownPk != nil, stopPosition)) + + sReq := &psdbconnect.SyncRequest{ + TableName: tableName, + Cursor: tc, + TabletType: tabletType, + Columns: columns, + IncludeUpdates: true, + IncludeInserts: true, + IncludeDeletes: true, + } + + c, err := client.Sync(ctx, sReq) + if err != nil { + return tc, err + } + + // stop when we've reached the well known stop position for this sync session. + watchForVgGtidChange := false + for { + + res, err := c.Recv() + if err != nil { + return tc, err + } + + if res.Cursor != nil { + tc = res.Cursor + } + + // Because of the ordering of events in a vstream + // we receive the vgtid event first and then the rows. + // the vgtid event might repeat, but they're ordered. + // so we once we reach the desired stop vgtid, we stop the sync session + // if we get a newer vgtid. + watchForVgGtidChange = watchForVgGtidChange || tc.Position == stopPosition + + if onResult != nil { + for _, insertedRow := range res.Result { + qr := sqltypes.Proto3ToResult(insertedRow) + for _, row := range qr.Rows { + sqlResult := &sqltypes.Result{ + Fields: insertedRow.Fields, + } + sqlResult.Rows = append(sqlResult.Rows, row) + if err := onResult(sqlResult, OpType_Insert); err != nil { + return tc, status.Error(codes.Internal, "unable to serialize row") + } + } + } + + for _, deletedRow := range res.Deletes { + qr := sqltypes.Proto3ToResult(deletedRow.Result) + for _, row := range qr.Rows { + sqlResult := &sqltypes.Result{ + Fields: deletedRow.Result.Fields, + } + sqlResult.Rows = append(sqlResult.Rows, row) + if err := onResult(sqlResult, OpType_Delete); err != nil { + return nil, status.Error(codes.Internal, "unable to serialize row") + } + } + } + } + + if onUpdate != nil { + for _, update := range res.Updates { + updatedRow := &UpdatedRow{ + Before: serializeQueryResult(update.Before), + After: serializeQueryResult(update.After), + } + if err := onUpdate(updatedRow); err != nil { + return nil, status.Error(codes.Internal, "unable to serialize update") + } + } + } + + if watchForVgGtidChange && tc.Position != stopPosition { + if err := onCursor(tc); err != nil { + return tc, status.Error(codes.Internal, "unable to serialize cursor") + } + return tc, io.EOF + } + } +} + +func serializeQueryResult(result *query.QueryResult) *sqltypes.Result { + qr := sqltypes.Proto3ToResult(result) + var sqlResult *sqltypes.Result + for _, row := range qr.Rows { + sqlResult = &sqltypes.Result{ + Fields: result.Fields, + } + sqlResult.Rows = append(sqlResult.Rows, row) + } + return sqlResult +} + +func (p connectClient) getLatestCursorPosition(ctx context.Context, shard, keyspace string, tableName string, ps PlanetScaleSource, tabletType psdbconnect.TabletType) (string, error) { + timeout := 45 * time.Second + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + var ( + err error + client psdbconnect.ConnectClient + ) + + if p.clientFn == nil { + conn, err := grpcclient.Dial(ctx, ps.Host, + clientoptions.WithDefaultTLSConfig(), + clientoptions.WithCompression(true), + clientoptions.WithConnectionPool(1), + clientoptions.WithExtraCallOption( + auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), + ), + ) + if err != nil { + return "", err + } + defer conn.Close() + client = psdbconnect.NewConnectClient(conn) + } else { + client, err = p.clientFn(ctx, ps) + if err != nil { + return "", err + } + } + + sReq := &psdbconnect.SyncRequest{ + TableName: tableName, + Cursor: &psdbconnect.TableCursor{ + Shard: shard, + Keyspace: keyspace, + Position: "current", + }, + TabletType: tabletType, + } + + c, err := client.Sync(ctx, sReq) + if err != nil { + return "", nil + } + + for { + res, err := c.Recv() + if err != nil { + return "", err + } + + if res.Cursor != nil { + return res.Cursor.Position, nil + } + } +} diff --git a/lib/connect_client_test.go b/lib/connect_client_test.go new file mode 100644 index 0000000..090d756 --- /dev/null +++ b/lib/connect_client_test.go @@ -0,0 +1,298 @@ +package lib + +import ( + "context" + "fmt" + "testing" + + "vitess.io/vitess/go/vt/proto/query" + + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + + "github.com/stretchr/testify/assert" + "google.golang.org/grpc" + + "vitess.io/vitess/go/sqltypes" +) + +func TestRead_CanPeekBeforeRead(t *testing.T) { + dbl := &dbLogger{} + ped := connectClient{} + tc := &psdbconnect.TableCursor{ + Shard: "-", + Position: "THIS_IS_A_SHARD_GTID", + Keyspace: "connect-test", + } + + syncClient := &connectSyncClientMock{ + syncResponses: []*psdbconnect.SyncResponse{ + { + Cursor: tc, + }, + { + Cursor: tc, + }, + }, + } + + cc := clientConnectionMock{ + syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + assert.Equal(t, "current", in.Cursor.Position) + return syncClient, nil + }, + } + ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { + return &cc, nil + } + ps := PlanetScaleSource{} + onRow := func(*sqltypes.Result, Operation) error { + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + return nil + } + sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) + assert.NoError(t, err) + esc, err := TableCursorToSerializedCursor(tc) + assert.NoError(t, err) + assert.Equal(t, esc, sc) + assert.Equal(t, 1, cc.syncFnInvokedCount) +} + +func TestRead_CanEarlyExitIfNoNewVGtidInPeek(t *testing.T) { + dbl := &dbLogger{} + ped := connectClient{} + tc := &psdbconnect.TableCursor{ + Shard: "-", + Position: "THIS_IS_A_SHARD_GTID", + Keyspace: "connect-test", + } + + syncClient := &connectSyncClientMock{ + syncResponses: []*psdbconnect.SyncResponse{ + {Cursor: tc}, + }, + } + + cc := clientConnectionMock{ + syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + assert.Equal(t, "current", in.Cursor.Position) + return syncClient, nil + }, + } + ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { + return &cc, nil + } + ps := PlanetScaleSource{} + onRow := func(*sqltypes.Result, Operation) error { + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + return nil + } + sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) + assert.NoError(t, err) + esc, err := TableCursorToSerializedCursor(tc) + assert.NoError(t, err) + assert.Equal(t, esc, sc, "should return original cursor if no new rows found") + assert.Equal(t, 1, cc.syncFnInvokedCount) + assert.Contains(t, dbl.messages[len(dbl.messages)-1].message, "no new rows found, exiting") +} + +func TestRead_CanPickPrimaryForShardedKeyspaces(t *testing.T) { + dbl := &dbLogger{} + ped := connectClient{} + tc := &psdbconnect.TableCursor{ + Shard: "40-80", + Position: "THIS_IS_A_SHARD_GTID", + Keyspace: "connect-test", + } + + syncClient := &connectSyncClientMock{ + syncResponses: []*psdbconnect.SyncResponse{ + {Cursor: tc}, + }, + } + + cc := clientConnectionMock{ + syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) + return syncClient, nil + }, + } + ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { + return &cc, nil + } + ps := PlanetScaleSource{ + Database: "connect-test", + } + onRow := func(*sqltypes.Result, Operation) error { + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + return nil + } + sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) + assert.NoError(t, err) + esc, err := TableCursorToSerializedCursor(tc) + assert.NoError(t, err) + assert.Equal(t, esc, sc) + assert.Equal(t, 1, cc.syncFnInvokedCount) +} + +func TestRead_CanReturnNewCursorIfNewFound(t *testing.T) { + dbl := &dbLogger{} + ped := connectClient{} + tc := &psdbconnect.TableCursor{ + Shard: "-", + Position: "THIS_IS_A_SHARD_GTID", + Keyspace: "connect-test", + } + newTC := &psdbconnect.TableCursor{ + Shard: "-", + Position: "I_AM_FARTHER_IN_THE_BINLOG", + Keyspace: "connect-test", + } + + syncClient := &connectSyncClientMock{ + syncResponses: []*psdbconnect.SyncResponse{ + {Cursor: newTC}, + {Cursor: newTC}, + }, + } + + cc := clientConnectionMock{ + syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) + return syncClient, nil + }, + } + ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { + return &cc, nil + } + ps := PlanetScaleSource{ + Database: "connect-test", + } + onRow := func(*sqltypes.Result, Operation) error { + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + return nil + } + sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) + assert.NoError(t, err) + esc, err := TableCursorToSerializedCursor(newTC) + assert.NoError(t, err) + assert.Equal(t, esc, sc) + assert.Equal(t, 2, cc.syncFnInvokedCount) +} + +func TestRead_CanStopAtWellKnownCursor(t *testing.T) { + dbl := &dbLogger{} + ped := connectClient{} + + testFields := sqltypes.MakeTestFields( + "pid|description", + "int64|varbinary") + numResponses := 10 + // when the client tries to get the "current" vgtid, + // we return the ante-penultimate element of the array. + currentVGtidPosition := (numResponses * 3) - 4 + // this is the next vgtid that should stop the sync session. + nextVGtidPosition := currentVGtidPosition + 1 + responses := make([]*psdbconnect.SyncResponse, 0, numResponses) + for i := 0; i < numResponses; i++ { + // this simulates multiple events being returned, for the same vgtid, from vstream + for x := 0; x < 3; x++ { + var ( + inserts []*query.QueryResult + deletes []*psdbconnect.DeletedRow + ) + if x == 2 { + inserts = []*query.QueryResult{ + sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, + fmt.Sprintf("%v|keyboard", i+1), + fmt.Sprintf("%v|monitor", i+2), + )), + } + deletes = []*psdbconnect.DeletedRow{ + { + Result: sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, + fmt.Sprintf("%v|deleted_monitor", i+12), + )), + }, + { + Result: sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, + fmt.Sprintf("%v|deleted_monitor", i+12), + )), + }, + } + } + + vgtid := fmt.Sprintf("e4e20f06-e28f-11ec-8d20-8e7ac09cb64c:1-%v", i) + responses = append(responses, &psdbconnect.SyncResponse{ + Cursor: &psdbconnect.TableCursor{ + Shard: "-", + Keyspace: "connect-test", + Position: vgtid, + }, + Result: inserts, + Deletes: deletes, + }) + } + } + + syncClient := &connectSyncClientMock{ + syncResponses: responses, + } + + getCurrentVGtidClient := &connectSyncClientMock{ + syncResponses: []*psdbconnect.SyncResponse{ + responses[currentVGtidPosition], + }, + } + + cc := clientConnectionMock{ + syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) + if in.Cursor.Position == "current" { + return getCurrentVGtidClient, nil + } + + return syncClient, nil + }, + } + + ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { + return &cc, nil + } + ps := PlanetScaleSource{ + Database: "connect-test", + } + insertedRowCounter := 0 + deletedRowCounter := 0 + onRow := func(res *sqltypes.Result, op Operation) error { + if op == OpType_Insert { + insertedRowCounter += 1 + } + if op == OpType_Delete { + deletedRowCounter += 1 + } + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + return nil + } + sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, responses[0].Cursor, onRow, onCursor, nil) + + assert.NoError(t, err) + // sync should start at the first vgtid + esc, err := TableCursorToSerializedCursor(responses[nextVGtidPosition].Cursor) + assert.NoError(t, err) + assert.Equal(t, esc, sc) + assert.Equal(t, 2, cc.syncFnInvokedCount) + + assert.Equal(t, "[connect-test:customers shard : -] Finished reading all rows for table [customers]", dbl.messages[len(dbl.messages)-1].message) + assert.Equal(t, 2*(nextVGtidPosition/3), insertedRowCounter) + assert.Equal(t, 2*(nextVGtidPosition/3), deletedRowCounter) +} diff --git a/lib/mysql_client.go b/lib/mysql_client.go new file mode 100644 index 0000000..26615a2 --- /dev/null +++ b/lib/mysql_client.go @@ -0,0 +1,200 @@ +package lib + +import ( + "context" + "database/sql" + "fmt" + "regexp" + "strings" + "time" + + "github.com/pkg/errors" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" +) + +const ( + gCTableNameExpression string = `^_vt_(HOLD|PURGE|EVAC|DROP)_([0-f]{32})_([0-9]{14})$` +) + +var gcTableNameRegexp = regexp.MustCompile(gCTableNameExpression) + +type MysqlClient interface { + BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error + PingContext(context.Context, PlanetScaleSource) error + GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) + Close() error +} + +func NewMySQL(psc *PlanetScaleSource) (MysqlClient, error) { + db, err := sql.Open("mysql", psc.DSN(psdbconnect.TabletType_primary)) + if err != nil { + return nil, err + } + + return mysqlClient{ + db: db, + }, nil +} + +type mysqlClient struct { + db *sql.DB +} + +// BuildSchema returns schemas for all tables in a PlanetScale database +// 1. Get all keyspaces for the PlanetScale database +// 2. Get the schemas for all tables in a keyspace, for each keyspace +// 2. Get columns and primary keys for each table from information_schema.columns +// 3. Format results into FiveTran response +func (p mysqlClient) BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error { + keyspaces, err := p.GetKeyspaces(ctx, psc) + if err != nil { + return errors.Wrap(err, "Unable to build schema for database") + } + + for _, keyspaceName := range keyspaces { + schemaBuilder.OnKeyspace(keyspaceName) + tableNames, err := p.getKeyspaceTableNames(ctx, keyspaceName) + if err != nil { + return errors.Wrap(err, "Unable to build schema for database") + } + + for _, tableName := range tableNames { + schemaBuilder.OnTable(keyspaceName, tableName) + + columns, err := p.getKeyspaceTableColumns(ctx, keyspaceName, tableName) + if err != nil { + return errors.Wrap(err, "Unable to build schema for database") + } + + schemaBuilder.OnColumns(keyspaceName, tableName, columns) + } + } + + return nil +} + +func (p mysqlClient) Close() error { + return p.db.Close() +} + +func (p mysqlClient) getKeyspaceTableColumns(ctx context.Context, keyspaceName string, tableName string) ([]MysqlColumn, error) { + var columns []MysqlColumn + columnNamesQR, err := p.db.QueryContext( + ctx, + "select column_name, column_type, column_key from information_schema.columns where table_name=? AND table_schema=?;", + tableName, keyspaceName, + ) + if err != nil { + return nil, errors.Wrapf(err, "Unable to get column names & types for table %v", tableName) + } + for columnNamesQR.Next() { + var ( + name string + columnType string + columnKey string + ) + if err = columnNamesQR.Scan(&name, &columnType, &columnKey); err != nil { + return nil, errors.Wrapf(err, "Unable to scan row for column names & types of table %v", tableName) + } + + columns = append(columns, MysqlColumn{ + Name: name, + Type: columnType, + IsPrimaryKey: strings.EqualFold(columnKey, "PRI"), + }) + } + + if err := columnNamesQR.Err(); err != nil { + return nil, errors.Wrapf(err, "unable to iterate columns for table %s", tableName) + } + + return columns, nil +} + +func (p mysqlClient) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { + var shards []string + + // TODO: is there a prepared statement equivalent? + shardNamesQR, err := p.db.QueryContext( + ctx, + `show vitess_shards like "%`+psc.Database+`%";`, + ) + if err != nil { + return shards, errors.Wrap(err, "Unable to query database for shards") + } + + for shardNamesQR.Next() { + var name string + if err = shardNamesQR.Scan(&name); err != nil { + return shards, errors.Wrap(err, "unable to get shard names") + } + + shards = append(shards, strings.TrimPrefix(name, psc.Database+"/")) + } + + if err := shardNamesQR.Err(); err != nil { + return shards, errors.Wrapf(err, "unable to iterate shard names for %s", psc.Database) + } + return shards, nil +} + +func (p mysqlClient) PingContext(ctx context.Context, psc PlanetScaleSource) error { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + return p.db.PingContext(ctx) +} + +func (p mysqlClient) getKeyspaceTableNames(ctx context.Context, keyspaceName string) ([]string, error) { + var tables []string + + tableNamesQR, err := p.db.Query(fmt.Sprintf("show tables from `%s`;", keyspaceName)) + if err != nil { + return tables, errors.Wrap(err, "Unable to query database for schema") + } + + for tableNamesQR.Next() { + var name string + if err = tableNamesQR.Scan(&name); err != nil { + return tables, errors.Wrap(err, "unable to get table names") + } + + // skip any tables that are vitess GC tables. + if !gcTableNameRegexp.MatchString(name) { + tables = append(tables, name) + } + + } + + if err := tableNamesQR.Err(); err != nil { + return tables, errors.Wrap(err, "unable to iterate table rows") + } + + return tables, err +} + +func (p mysqlClient) GetKeyspaces(ctx context.Context, psc PlanetScaleSource) ([]string, error) { + var keyspaces []string + + // TODO: is there a prepared statement equivalent? + shardNamesQR, err := p.db.QueryContext( + ctx, + `show vitess_keyspaces like "%`+psc.Database+`%";`, + ) + if err != nil { + return keyspaces, errors.Wrap(err, "Unable to query database for keyspaces") + } + + for shardNamesQR.Next() { + var name string + if err = shardNamesQR.Scan(&name); err != nil { + return keyspaces, errors.Wrap(err, "unable to get shard names") + } + + keyspaces = append(keyspaces, strings.TrimPrefix(name, psc.Database+"/")) + } + + if err := shardNamesQR.Err(); err != nil { + return keyspaces, errors.Wrapf(err, "unable to iterate shard names for %s", psc.Database) + } + return keyspaces, nil +} diff --git a/lib/planetscale_source.go b/lib/planetscale_source.go new file mode 100644 index 0000000..dc45420 --- /dev/null +++ b/lib/planetscale_source.go @@ -0,0 +1,96 @@ +package lib + +import ( + "fmt" + "os" + "strings" + + "github.com/go-sql-driver/mysql" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" +) + +// PlanetScaleSource defines a configured FiveTran Source for a PlanetScale database +// Consider this a connection string to a PlanetScale database. +type PlanetScaleSource struct { + Host string `json:"host"` + Database string `json:"database"` + Username string `json:"username"` + Password string `json:"password"` + Shards string `json:"shards"` + TreatTinyIntAsBoolean bool `json:"treat_tiny_int_as_boolean"` +} + +// DSN returns a DataSource that mysql libraries can use to connect to a PlanetScale database. +func (psc PlanetScaleSource) DSN(tt psdbconnect.TabletType) string { + config := mysql.NewConfig() + config.Net = "tcp" + config.Addr = psc.Host + config.User = psc.Username + config.DBName = psc.Database + config.Passwd = psc.Password + + if useSecureConnection() { + config.TLSConfig = "true" + config.DBName = fmt.Sprintf("%v@%v", psc.Database, tabletTypeToString(tt)) + } else { + config.TLSConfig = "skip-verify" + } + return config.FormatDSN() +} + +func tabletTypeToString(t psdbconnect.TabletType) string { + if t == psdbconnect.TabletType_replica { + return "replica" + } + + return "primary" +} + +func useSecureConnection() bool { + e2eTestRun, found := os.LookupEnv("PS_END_TO_END_TEST_RUN") + if found && (e2eTestRun == "yes" || + e2eTestRun == "y" || + e2eTestRun == "true" || + e2eTestRun == "1") { + return false + } + + return true +} + +// GetInitialState will return the initial/blank state for a given keyspace in all of its shards. +// This state can be round-tripped safely with FiveTran. +func (psc PlanetScaleSource) GetInitialState(keyspaceOrDatabase string, shards []string) (ShardStates, error) { + shardCursors := ShardStates{ + Shards: map[string]*SerializedCursor{}, + } + + if len(psc.Shards) > 0 { + configuredShards := strings.Split(psc.Shards, ",") + foundShards := map[string]bool{} + for _, existingShard := range shards { + foundShards[existingShard] = true + } + + for _, configuredShard := range configuredShards { + if len(configuredShard) > 0 { + if _, ok := foundShards[strings.TrimSpace(configuredShard)]; !ok { + return shardCursors, fmt.Errorf("shard %v does not exist on the source database", configuredShard) + } + } + } + + // if we got this far, all the shards that the customer asked for exist in the PlanetScale database. + shards = configuredShards + } + + for _, shard := range shards { + shardCursors.Shards[shard], _ = TableCursorToSerializedCursor(&psdbconnect.TableCursor{ + Shard: shard, + Keyspace: keyspaceOrDatabase, + Position: "", + }) + } + + return shardCursors, nil +} diff --git a/lib/test_types.go b/lib/test_types.go new file mode 100644 index 0000000..f712d7b --- /dev/null +++ b/lib/test_types.go @@ -0,0 +1,127 @@ +package lib + +import ( + "context" + "io" + + "github.com/pkg/errors" + + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "google.golang.org/grpc" +) + +type dbLogMessage struct { + message string +} +type dbLogger struct { + messages []dbLogMessage +} + +func (dbl *dbLogger) Info(s string) { + dbl.messages = append(dbl.messages, dbLogMessage{ + message: s, + }) +} + +type clientConnectionMock struct { + syncFn func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) + syncFnInvoked bool + syncFnInvokedCount int +} + +type connectSyncClientMock struct { + lastResponseSent int + syncResponses []*psdbconnect.SyncResponse + grpc.ClientStream +} + +func (x *connectSyncClientMock) Recv() (*psdbconnect.SyncResponse, error) { + if x.lastResponseSent >= len(x.syncResponses) { + return nil, io.EOF + } + x.lastResponseSent += 1 + return x.syncResponses[x.lastResponseSent-1], nil +} + +func (c *clientConnectionMock) Sync(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { + c.syncFnInvoked = true + c.syncFnInvokedCount += 1 + return c.syncFn(ctx, in, opts...) +} + +type ( + BuildSchemaFunc func(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error + PingContextFunc func(context.Context, PlanetScaleSource) error + GetVitessShardsFunc func(ctx context.Context, psc PlanetScaleSource) ([]string, error) + TestMysqlClient struct { + BuildSchemaFn BuildSchemaFunc + PingContextFn PingContextFunc + GetVitessShardsFn GetVitessShardsFunc + } +) + +func (t TestMysqlClient) BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error { + if t.BuildSchemaFn != nil { + return t.BuildSchemaFn(ctx, psc, schemaBuilder) + } + + panic("BuildSchema is not implemented") +} + +func (t TestMysqlClient) PingContext(ctx context.Context, source PlanetScaleSource) error { + if t.PingContextFn != nil { + return t.PingContextFn(ctx, source) + } + + panic("PingContext is not implemented") +} + +func (t TestMysqlClient) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { + if t.GetVitessShardsFn != nil { + return t.GetVitessShardsFn(ctx, psc) + } + panic("GetvitessShards is not implemented") +} + +func (t TestMysqlClient) Close() error { + return nil +} + +type ( + ReadFunc func(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, tc *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) + CanConnectFunc func(ctx context.Context, ps PlanetScaleSource) error + ListShardsFunc func(ctx context.Context, ps PlanetScaleSource) ([]string, error) + + TestConnectClient struct { + ReadFn ReadFunc + CanConnectFn CanConnectFunc + ListShardsFn ListShardsFunc + } +) + +func (tcc *TestConnectClient) ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) { + if tcc.ListShardsFn != nil { + return tcc.ListShardsFn(ctx, ps) + } + + panic("implement me") +} + +func (tcc *TestConnectClient) CanConnect(ctx context.Context, ps PlanetScaleSource) error { + if tcc.CanConnectFn != nil { + return tcc.CanConnectFn(ctx, ps) + } + return errors.New("CanConnect is Unimplemented") +} + +func (tcc *TestConnectClient) Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) { + if tcc.ReadFn != nil { + return tcc.ReadFn(ctx, logger, ps, tableName, columns, lastKnownPosition, onResult, onCursor, onUpdate) + } + + return nil, errors.New("Read is Unimplemented") +} + +func NewTestConnectClient(r ReadFunc) ConnectClient { + return &TestConnectClient{ReadFn: r} +} diff --git a/lib/types.go b/lib/types.go new file mode 100644 index 0000000..ccfab4d --- /dev/null +++ b/lib/types.go @@ -0,0 +1,78 @@ +package lib + +import ( + "encoding/base64" + + "vitess.io/vitess/go/sqltypes" + + "github.com/pkg/errors" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/psdb/core/codec" +) + +type Operation int64 + +const ( + OpType_Insert Operation = iota + OpType_Update + OpType_Delete +) + +type UpdatedRow struct { + Before *sqltypes.Result + After *sqltypes.Result +} +type MysqlColumn struct { + Name string + Type string + IsPrimaryKey bool +} + +type SchemaBuilder interface { + OnKeyspace(keyspaceName string) + OnTable(keyspaceName, tableName string) + OnColumns(keyspaceName, tableName string, columns []MysqlColumn) +} + +func (s SerializedCursor) SerializedCursorToTableCursor() (*psdbconnect.TableCursor, error) { + var tc psdbconnect.TableCursor + decoded, err := base64.StdEncoding.DecodeString(s.Cursor) + if err != nil { + return nil, errors.Wrap(err, "unable to decode table cursor") + } + + err = codec.DefaultCodec.Unmarshal(decoded, &tc) + if err != nil { + return nil, errors.Wrap(err, "unable to deserialize table cursor") + } + + return &tc, nil +} + +func TableCursorToSerializedCursor(cursor *psdbconnect.TableCursor) (*SerializedCursor, error) { + d, err := codec.DefaultCodec.Marshal(cursor) + if err != nil { + return nil, errors.Wrap(err, "unable to marshal table cursor to save staate.") + } + + sc := &SerializedCursor{ + Cursor: base64.StdEncoding.EncodeToString(d), + } + return sc, nil +} + +type SerializedCursor struct { + Cursor string `json:"cursor"` +} + +type ShardStates struct { + Shards map[string]*SerializedCursor `json:"shards"` +} + +type KeyspaceState struct { + Streams map[string]ShardStates `json:"streams"` +} + +type SyncState struct { + Keyspaces map[string]KeyspaceState `json:"keyspaces"` +} From ee5faa3a678631ce41ec55ebb7b1593e283fadd4 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 12:50:27 -0600 Subject: [PATCH 02/13] move to latest lib sources --- go.mod | 3 +++ go.sum | 5 +++++ lib/connect_client.go | 6 ++++++ lib/mysql_client.go | 1 + lib/planetscale_source.go | 1 + proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1.pb.go | 4 ++-- proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_grpc.pb.go | 2 +- .../psdbconnect/v1alpha1/psdbconnect.v1alpha1_vtproto.pb.go | 2 +- 8 files changed, 20 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 00a40a1..f1a48fd 100644 --- a/go.mod +++ b/go.mod @@ -7,12 +7,15 @@ require ( github.com/spf13/cobra v1.7.0 github.com/stretchr/testify v1.8.4 vitess.io/vitess v0.17.3 +//github.com/planetscale/vtprotobuf v0.5.0 //indirect ) require ( github.com/pkg/errors v0.9.1 github.com/planetscale/psdb v0.0.0-20220429000526-e2a0e798aaf3 + github.com/twitchtv/twirp v8.1.2+incompatible google.golang.org/grpc v1.59.0 + google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 google.golang.org/protobuf v1.31.0 ) diff --git a/go.sum b/go.sum index 63f2d6f..255b330 100644 --- a/go.sum +++ b/go.sum @@ -380,6 +380,8 @@ github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNG github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= +github.com/twitchtv/twirp v8.1.2+incompatible h1:0O6TfzZW09ZP5r+ORA90XQEE3PTgA6C7MBbl2KxvVgE= +github.com/twitchtv/twirp v8.1.2+incompatible/go.mod h1:RRJoFSAmTEh2weEqWtpPE3vFK5YBhA6bqp2l1kfCC5A= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= @@ -769,6 +771,8 @@ google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA5 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0 h1:TLkBREm4nIsEcexnCjgQd5GQWaHcqMzwQV0TX9pq8S0= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.2.0/go.mod h1:DNq5QpG7LJqD2AamLZ7zvKE0DEpVl2BSEVjFycAAjRY= google.golang.org/grpc/examples v0.0.0-20210430044426-28078834f35b h1:D/GTYPo6I1oEo08Bfpuj3xl5XE+UGHj7//5fVyKxhsQ= google.golang.org/grpc/examples v0.0.0-20210430044426-28078834f35b/go.mod h1:Ly7ZA/ARzg8fnPU9TyZIxoz33sEUuWX7txiqs8lPTgE= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -783,6 +787,7 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= diff --git a/lib/connect_client.go b/lib/connect_client.go index a2fa127..d5ed904 100644 --- a/lib/connect_client.go +++ b/lib/connect_client.go @@ -105,6 +105,10 @@ func (p connectClient) Read(ctx context.Context, logger DatabaseLogger, ps Plane ) tabletType := psdbconnect.TabletType_primary + if ps.UseReplica { + tabletType = psdbconnect.TabletType_replica + } + currentPosition := lastKnownPosition readDuration := 1 * time.Minute preamble := fmt.Sprintf("[%v:%v shard : %v] ", ps.Database, tableName, currentPosition.Shard) @@ -195,6 +199,7 @@ func (p connectClient) sync(ctx context.Context, logger DatabaseLogger, tableNam IncludeUpdates: true, IncludeInserts: true, IncludeDeletes: true, + Cells: []string{"planetscale_operator_default"}, } c, err := client.Sync(ctx, sReq) @@ -321,6 +326,7 @@ func (p connectClient) getLatestCursorPosition(ctx context.Context, shard, keysp Position: "current", }, TabletType: tabletType, + Cells: []string{"planetscale_operator_default"}, } c, err := client.Sync(ctx, sReq) diff --git a/lib/mysql_client.go b/lib/mysql_client.go index 26615a2..054f557 100644 --- a/lib/mysql_client.go +++ b/lib/mysql_client.go @@ -163,6 +163,7 @@ func (p mysqlClient) getKeyspaceTableNames(ctx context.Context, keyspaceName str tables = append(tables, name) } + tables = append(tables, name) } if err := tableNamesQR.Err(); err != nil { diff --git a/lib/planetscale_source.go b/lib/planetscale_source.go index dc45420..7af7c17 100644 --- a/lib/planetscale_source.go +++ b/lib/planetscale_source.go @@ -18,6 +18,7 @@ type PlanetScaleSource struct { Password string `json:"password"` Shards string `json:"shards"` TreatTinyIntAsBoolean bool `json:"treat_tiny_int_as_boolean"` + UseReplica bool `json:"use_replica"` } // DSN returns a DataSource that mysql libraries can use to connect to a PlanetScale database. diff --git a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1.pb.go b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1.pb.go index e096d1c..12a8c34 100644 --- a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1.pb.go +++ b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 -// protoc v3.21.5 +// protoc-gen-go v1.31.0 +// protoc v3.20.1 // source: psdbconnect.v1alpha1.proto package psdbconnectv1alpha1 diff --git a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_grpc.pb.go b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_grpc.pb.go index 4ebaf3f..29281c3 100644 --- a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_grpc.pb.go +++ b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.2.0 -// - protoc v3.21.5 +// - protoc v3.20.1 // source: psdbconnect.v1alpha1.proto package psdbconnectv1alpha1 diff --git a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_vtproto.pb.go b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_vtproto.pb.go index 32497a5..9822827 100644 --- a/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_vtproto.pb.go +++ b/proto/psdbconnect/v1alpha1/psdbconnect.v1alpha1_vtproto.pb.go @@ -1,5 +1,5 @@ // Code generated by protoc-gen-go-vtproto. DO NOT EDIT. -// protoc-gen-go-vtproto version: v0.4.0 +// protoc-gen-go-vtproto version: v0.5.0 // source: psdbconnect.v1alpha1.proto package psdbconnectv1alpha1 From 909374c0703b07bfc9e2233645251b07c60298b9 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 14:23:39 -0600 Subject: [PATCH 03/13] implement schema builder --- cmd/internal/schema_builder.go | 102 ++++++++-- go.mod | 5 +- lib/connect_client.go | 347 --------------------------------- lib/connect_client_test.go | 298 ---------------------------- lib/mysql_client.go | 201 ------------------- lib/planetscale_source.go | 97 --------- lib/test_types.go | 127 ------------ lib/types.go | 78 -------- 8 files changed, 89 insertions(+), 1166 deletions(-) delete mode 100644 lib/connect_client.go delete mode 100644 lib/connect_client_test.go delete mode 100644 lib/mysql_client.go delete mode 100644 lib/planetscale_source.go delete mode 100644 lib/test_types.go delete mode 100644 lib/types.go diff --git a/cmd/internal/schema_builder.go b/cmd/internal/schema_builder.go index feea265..a47b817 100644 --- a/cmd/internal/schema_builder.go +++ b/cmd/internal/schema_builder.go @@ -1,31 +1,99 @@ package internal -import "github.com/planetscale/airbyte-source/lib" +import ( + "github.com/planetscale/connectsdk/lib" + "regexp" + "strings" +) -type schemaBuilder struct { - treatTinyIntAsBoolean bool - catalog *Catalog +const ( + gCTableNameExpression string = `^_vt_(HOLD|PURGE|EVAC|DROP)_([0-f]{32})_([0-9]{14})$` +) + +var gcTableNameRegexp = regexp.MustCompile(gCTableNameExpression) + +type SchemaBuilder struct { + catalog *Catalog + streams map[string]map[string]Stream } -func NewSchemaBuilder(treatTinyIntAsBoolean bool) lib.SchemaBuilder { - return &schemaBuilder{ - treatTinyIntAsBoolean: treatTinyIntAsBoolean, +func (sb *SchemaBuilder) OnKeyspace(_ string) { + // no-op as Airbyte has schemas as a flat list. +} + +func (sb *SchemaBuilder) OnTable(keyspaceName, tableName string) { + // skip any that are Vitess's GC tables. + if gcTableNameRegexp.MatchString(tableName) { + return + } + + schema := StreamSchema{ + Type: "object", + Properties: map[string]PropertyType{}, + } + + stream := Stream{ + Name: tableName, + Schema: schema, + SupportedSyncModes: []string{"full_refresh", "incremental"}, + Namespace: keyspaceName, + } + + if _, ok := sb.streams[keyspaceName]; !ok { + sb.streams[keyspaceName] = make(map[string]Stream) } + + sb.streams[keyspaceName][tableName] = stream } -func (sb *schemaBuilder) OnKeyspace(keyspaceName string) { - if sb.catalog == nil { - sb.catalog = &Catalog{} +func (sb *SchemaBuilder) OnColumns(keyspaceName, tableName string, columns []lib.MysqlColumn) { + if _, ok := sb.streams[keyspaceName]; !ok { + return + } + + if _, ok := sb.streams[keyspaceName][tableName]; !ok { + return + } + + table := sb.streams[keyspaceName][tableName] + table.PrimaryKeys = [][]string{} + table.DefaultCursorFields = []string{} + + for _, column := range columns { + if column.IsPrimaryKey { + table.PrimaryKeys = append(table.PrimaryKeys, []string{column.Name}) + table.DefaultCursorFields = append(table.DefaultCursorFields, column.Name) + } + + table.Schema.Properties[column.Name] = getAirbyteDataType(column.Type, true) } - panic("implement me") } -func (schemaBuilder) OnTable(keyspaceName, tableName string) { - //TODO implement me - panic("implement me") +func (sb *SchemaBuilder) GetCatalog() Catalog { + return *sb.catalog } -func (schemaBuilder) OnColumns(keyspaceName, tableName string, columns []lib.MysqlColumn) { - //TODO implement me - panic("implement me") +// Convert columnType to Airbyte type. +func getAirbyteDataType(mysqlType string, treatTinyIntAsBoolean bool) PropertyType { + // Support custom airbyte types documented here : + // https://docs.airbyte.com/understanding-airbyte/supported-data-types/#the-types + switch { + case strings.HasPrefix(mysqlType, "tinyint(1)"): + if treatTinyIntAsBoolean { + return PropertyType{Type: "boolean"} + } + return PropertyType{Type: "number", AirbyteType: "integer"} + case strings.HasPrefix(mysqlType, "int"), strings.HasPrefix(mysqlType, "smallint"), strings.HasPrefix(mysqlType, "mediumint"), strings.HasPrefix(mysqlType, "bigint"), strings.HasPrefix(mysqlType, "tinyint"): + return PropertyType{Type: "number", AirbyteType: "integer"} + case strings.HasPrefix(mysqlType, "decimal"), strings.HasPrefix(mysqlType, "double"), strings.HasPrefix(mysqlType, "float"): + return PropertyType{Type: "number"} + case strings.HasPrefix(mysqlType, "datetime"), strings.HasPrefix(mysqlType, "timestamp"): + return PropertyType{Type: "string", CustomFormat: "date-time", AirbyteType: "timestamp_without_timezone"} + case strings.HasPrefix(mysqlType, "date"): + return PropertyType{Type: "string", CustomFormat: "date", AirbyteType: "date"} + case strings.HasPrefix(mysqlType, "time"): + return PropertyType{Type: "string", CustomFormat: "time", AirbyteType: "time_without_timezone"} + default: + return PropertyType{Type: "string"} + } } diff --git a/go.mod b/go.mod index f1a48fd..4114cfc 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,10 @@ module github.com/planetscale/airbyte-source -go 1.21 +go 1.21.3 require ( github.com/go-sql-driver/mysql v1.7.1 + github.com/planetscale/connectsdk v0.0.0-00010101000000-000000000000 github.com/spf13/cobra v1.7.0 github.com/stretchr/testify v1.8.4 vitess.io/vitess v0.17.3 @@ -82,3 +83,5 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect inet.af/netaddr v0.0.0-20220811202034-502d2d690317 // indirect ) + +replace github.com/planetscale/connectsdk => /Users/phaniraj/ps/connectsdk diff --git a/lib/connect_client.go b/lib/connect_client.go deleted file mode 100644 index d5ed904..0000000 --- a/lib/connect_client.go +++ /dev/null @@ -1,347 +0,0 @@ -package lib - -import ( - "context" - "fmt" - "io" - "net/http" - "strings" - "time" - - "vitess.io/vitess/go/vt/proto/query" - - "github.com/pkg/errors" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/psdb/auth" - grpcclient "github.com/planetscale/psdb/core/pool" - clientoptions "github.com/planetscale/psdb/core/pool/options" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - "vitess.io/vitess/go/sqltypes" - - _ "vitess.io/vitess/go/vt/vtctl/grpcvtctlclient" - _ "vitess.io/vitess/go/vt/vtgate/grpcvtgateconn" -) - -type ( - OnResult func(*sqltypes.Result, Operation) error - OnUpdate func(*UpdatedRow) error - OnCursor func(*psdbconnect.TableCursor) error -) - -type DatabaseLogger interface { - Info(string) -} - -// ConnectClient is a general purpose interface -// that defines all the data access methods needed for the PlanetScale Fivetran source to function. -type ConnectClient interface { - CanConnect(ctx context.Context, ps PlanetScaleSource) error - Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) - ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) -} - -func NewConnectClient(mysqlAccess *MysqlClient) ConnectClient { - return &connectClient{ - Mysql: mysqlAccess, - } -} - -// connectClient is an implementation of the ConnectClient interface defined above. -// It uses the mysql interface provided by PlanetScale for all schema/shard/tablet discovery and -// the grpc API for incrementally syncing rows from PlanetScale. -type connectClient struct { - clientFn func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) - Mysql *MysqlClient -} - -func (p connectClient) ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) { - return (*p.Mysql).GetVitessShards(ctx, ps) -} - -func (p connectClient) CanConnect(ctx context.Context, ps PlanetScaleSource) error { - if *p.Mysql == nil { - return status.Error(codes.Internal, "Mysql access is uninitialized") - } - - if err := p.checkEdgePassword(ctx, ps); err != nil { - return errors.Wrap(err, "Unable to initialize Connect Session") - } - - return (*p.Mysql).PingContext(ctx, ps) -} - -func (p connectClient) checkEdgePassword(ctx context.Context, psc PlanetScaleSource) error { - if !strings.HasSuffix(psc.Host, ".connect.psdb.cloud") { - return errors.New("This password is not connect-enabled, please ensure that your organization is enrolled in the Connect beta.") - } - reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fmt.Sprintf("https://%v", psc.Host), nil) - if err != nil { - return err - } - - _, err = http.DefaultClient.Do(req) - if err != nil { - return errors.Errorf("The database %q, hosted at %q, is inaccessible from this process", psc.Database, psc.Host) - } - - return nil -} - -// Read streams rows from a table given a starting cursor. -// 1. We will get the latest vgtid for a given table in a shard when a sync session starts. -// 2. This latest vgtid is now the stopping point for this sync session. -// 3. Ask vstream to stream from the last known vgtid -// 4. When we reach the stopping point, read all rows available at this vgtid -// 5. End the stream when (a) a vgtid newer than latest vgtid is encountered or (b) the timeout kicks in. -func (p connectClient) Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) { - var ( - err error - sErr error - currentSerializedCursor *SerializedCursor - ) - - tabletType := psdbconnect.TabletType_primary - if ps.UseReplica { - tabletType = psdbconnect.TabletType_replica - } - - currentPosition := lastKnownPosition - readDuration := 1 * time.Minute - preamble := fmt.Sprintf("[%v:%v shard : %v] ", ps.Database, tableName, currentPosition.Shard) - for { - logger.Info(preamble + "peeking to see if there's any new rows") - latestCursorPosition, lcErr := p.getLatestCursorPosition(ctx, currentPosition.Shard, currentPosition.Keyspace, tableName, ps, tabletType) - if lcErr != nil { - return currentSerializedCursor, errors.Wrap(err, "Unable to get latest cursor position") - } - - // the current vgtid is the same as the last synced vgtid, no new rows. - if latestCursorPosition == currentPosition.Position { - logger.Info(preamble + "no new rows found, exiting") - return TableCursorToSerializedCursor(currentPosition) - } - logger.Info(fmt.Sprintf("new rows found, syncing rows for %v", readDuration)) - logger.Info(fmt.Sprintf(preamble+"syncing rows with cursor [%v]", currentPosition)) - - currentPosition, err = p.sync(ctx, logger, tableName, columns, currentPosition, latestCursorPosition, ps, tabletType, readDuration, onResult, onCursor, onUpdate) - if currentPosition.Position != "" { - currentSerializedCursor, sErr = TableCursorToSerializedCursor(currentPosition) - if sErr != nil { - // if we failed to serialize here, we should bail. - return currentSerializedCursor, errors.Wrap(sErr, "unable to serialize current position") - } - } - if err != nil { - if s, ok := status.FromError(err); ok { - // if the error is anything other than server timeout, keep going - if s.Code() != codes.DeadlineExceeded { - logger.Info(fmt.Sprintf("%v Got error [%v] with message [%q], Returning with cursor :[%v] after server timeout", preamble, s.Code(), err, currentPosition)) - return currentSerializedCursor, nil - } else { - logger.Info(preamble + "Continuing with cursor after server timeout") - } - } else if errors.Is(err, io.EOF) { - logger.Info(fmt.Sprintf("%vFinished reading all rows for table [%v]", preamble, tableName)) - return currentSerializedCursor, nil - } else { - logger.Info(fmt.Sprintf("non-grpc error [%v]]", err)) - return currentSerializedCursor, err - } - } - } -} - -func (p connectClient) sync(ctx context.Context, logger DatabaseLogger, tableName string, columns []string, tc *psdbconnect.TableCursor, stopPosition string, ps PlanetScaleSource, tabletType psdbconnect.TabletType, readDuration time.Duration, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*psdbconnect.TableCursor, error) { - ctx, cancel := context.WithTimeout(ctx, readDuration) - defer cancel() - - var ( - err error - client psdbconnect.ConnectClient - ) - - if p.clientFn == nil { - conn, err := grpcclient.Dial(ctx, ps.Host, - clientoptions.WithDefaultTLSConfig(), - clientoptions.WithCompression(true), - clientoptions.WithConnectionPool(1), - clientoptions.WithExtraCallOption( - auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), - ), - ) - if err != nil { - return tc, err - } - defer conn.Close() - client = psdbconnect.NewConnectClient(conn) - } else { - client, err = p.clientFn(ctx, ps) - if err != nil { - return tc, err - } - } - - if tc.LastKnownPk != nil { - tc.Position = "" - } - - logger.Info(fmt.Sprintf("Syncing with cursor position : [%v], using last known PK : %v, stop cursor is : [%v]", tc.Position, tc.LastKnownPk != nil, stopPosition)) - - sReq := &psdbconnect.SyncRequest{ - TableName: tableName, - Cursor: tc, - TabletType: tabletType, - Columns: columns, - IncludeUpdates: true, - IncludeInserts: true, - IncludeDeletes: true, - Cells: []string{"planetscale_operator_default"}, - } - - c, err := client.Sync(ctx, sReq) - if err != nil { - return tc, err - } - - // stop when we've reached the well known stop position for this sync session. - watchForVgGtidChange := false - for { - - res, err := c.Recv() - if err != nil { - return tc, err - } - - if res.Cursor != nil { - tc = res.Cursor - } - - // Because of the ordering of events in a vstream - // we receive the vgtid event first and then the rows. - // the vgtid event might repeat, but they're ordered. - // so we once we reach the desired stop vgtid, we stop the sync session - // if we get a newer vgtid. - watchForVgGtidChange = watchForVgGtidChange || tc.Position == stopPosition - - if onResult != nil { - for _, insertedRow := range res.Result { - qr := sqltypes.Proto3ToResult(insertedRow) - for _, row := range qr.Rows { - sqlResult := &sqltypes.Result{ - Fields: insertedRow.Fields, - } - sqlResult.Rows = append(sqlResult.Rows, row) - if err := onResult(sqlResult, OpType_Insert); err != nil { - return tc, status.Error(codes.Internal, "unable to serialize row") - } - } - } - - for _, deletedRow := range res.Deletes { - qr := sqltypes.Proto3ToResult(deletedRow.Result) - for _, row := range qr.Rows { - sqlResult := &sqltypes.Result{ - Fields: deletedRow.Result.Fields, - } - sqlResult.Rows = append(sqlResult.Rows, row) - if err := onResult(sqlResult, OpType_Delete); err != nil { - return nil, status.Error(codes.Internal, "unable to serialize row") - } - } - } - } - - if onUpdate != nil { - for _, update := range res.Updates { - updatedRow := &UpdatedRow{ - Before: serializeQueryResult(update.Before), - After: serializeQueryResult(update.After), - } - if err := onUpdate(updatedRow); err != nil { - return nil, status.Error(codes.Internal, "unable to serialize update") - } - } - } - - if watchForVgGtidChange && tc.Position != stopPosition { - if err := onCursor(tc); err != nil { - return tc, status.Error(codes.Internal, "unable to serialize cursor") - } - return tc, io.EOF - } - } -} - -func serializeQueryResult(result *query.QueryResult) *sqltypes.Result { - qr := sqltypes.Proto3ToResult(result) - var sqlResult *sqltypes.Result - for _, row := range qr.Rows { - sqlResult = &sqltypes.Result{ - Fields: result.Fields, - } - sqlResult.Rows = append(sqlResult.Rows, row) - } - return sqlResult -} - -func (p connectClient) getLatestCursorPosition(ctx context.Context, shard, keyspace string, tableName string, ps PlanetScaleSource, tabletType psdbconnect.TabletType) (string, error) { - timeout := 45 * time.Second - ctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - var ( - err error - client psdbconnect.ConnectClient - ) - - if p.clientFn == nil { - conn, err := grpcclient.Dial(ctx, ps.Host, - clientoptions.WithDefaultTLSConfig(), - clientoptions.WithCompression(true), - clientoptions.WithConnectionPool(1), - clientoptions.WithExtraCallOption( - auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), - ), - ) - if err != nil { - return "", err - } - defer conn.Close() - client = psdbconnect.NewConnectClient(conn) - } else { - client, err = p.clientFn(ctx, ps) - if err != nil { - return "", err - } - } - - sReq := &psdbconnect.SyncRequest{ - TableName: tableName, - Cursor: &psdbconnect.TableCursor{ - Shard: shard, - Keyspace: keyspace, - Position: "current", - }, - TabletType: tabletType, - Cells: []string{"planetscale_operator_default"}, - } - - c, err := client.Sync(ctx, sReq) - if err != nil { - return "", nil - } - - for { - res, err := c.Recv() - if err != nil { - return "", err - } - - if res.Cursor != nil { - return res.Cursor.Position, nil - } - } -} diff --git a/lib/connect_client_test.go b/lib/connect_client_test.go deleted file mode 100644 index 090d756..0000000 --- a/lib/connect_client_test.go +++ /dev/null @@ -1,298 +0,0 @@ -package lib - -import ( - "context" - "fmt" - "testing" - - "vitess.io/vitess/go/vt/proto/query" - - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - - "github.com/stretchr/testify/assert" - "google.golang.org/grpc" - - "vitess.io/vitess/go/sqltypes" -) - -func TestRead_CanPeekBeforeRead(t *testing.T) { - dbl := &dbLogger{} - ped := connectClient{} - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - { - Cursor: tc, - }, - { - Cursor: tc, - }, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, "current", in.Cursor.Position) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{} - onRow := func(*sqltypes.Result, Operation) error { - return nil - } - onCursor := func(*psdbconnect.TableCursor) error { - return nil - } - sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) -} - -func TestRead_CanEarlyExitIfNoNewVGtidInPeek(t *testing.T) { - dbl := &dbLogger{} - ped := connectClient{} - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, "current", in.Cursor.Position) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{} - onRow := func(*sqltypes.Result, Operation) error { - return nil - } - onCursor := func(*psdbconnect.TableCursor) error { - return nil - } - sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc, "should return original cursor if no new rows found") - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.Contains(t, dbl.messages[len(dbl.messages)-1].message, "no new rows found, exiting") -} - -func TestRead_CanPickPrimaryForShardedKeyspaces(t *testing.T) { - dbl := &dbLogger{} - ped := connectClient{} - tc := &psdbconnect.TableCursor{ - Shard: "40-80", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - onRow := func(*sqltypes.Result, Operation) error { - return nil - } - onCursor := func(*psdbconnect.TableCursor) error { - return nil - } - sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) -} - -func TestRead_CanReturnNewCursorIfNewFound(t *testing.T) { - dbl := &dbLogger{} - ped := connectClient{} - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - newTC := &psdbconnect.TableCursor{ - Shard: "-", - Position: "I_AM_FARTHER_IN_THE_BINLOG", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: newTC}, - {Cursor: newTC}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - onRow := func(*sqltypes.Result, Operation) error { - return nil - } - onCursor := func(*psdbconnect.TableCursor) error { - return nil - } - sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, tc, onRow, onCursor, nil) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(newTC) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 2, cc.syncFnInvokedCount) -} - -func TestRead_CanStopAtWellKnownCursor(t *testing.T) { - dbl := &dbLogger{} - ped := connectClient{} - - testFields := sqltypes.MakeTestFields( - "pid|description", - "int64|varbinary") - numResponses := 10 - // when the client tries to get the "current" vgtid, - // we return the ante-penultimate element of the array. - currentVGtidPosition := (numResponses * 3) - 4 - // this is the next vgtid that should stop the sync session. - nextVGtidPosition := currentVGtidPosition + 1 - responses := make([]*psdbconnect.SyncResponse, 0, numResponses) - for i := 0; i < numResponses; i++ { - // this simulates multiple events being returned, for the same vgtid, from vstream - for x := 0; x < 3; x++ { - var ( - inserts []*query.QueryResult - deletes []*psdbconnect.DeletedRow - ) - if x == 2 { - inserts = []*query.QueryResult{ - sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, - fmt.Sprintf("%v|keyboard", i+1), - fmt.Sprintf("%v|monitor", i+2), - )), - } - deletes = []*psdbconnect.DeletedRow{ - { - Result: sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, - fmt.Sprintf("%v|deleted_monitor", i+12), - )), - }, - { - Result: sqltypes.ResultToProto3(sqltypes.MakeTestResult(testFields, - fmt.Sprintf("%v|deleted_monitor", i+12), - )), - }, - } - } - - vgtid := fmt.Sprintf("e4e20f06-e28f-11ec-8d20-8e7ac09cb64c:1-%v", i) - responses = append(responses, &psdbconnect.SyncResponse{ - Cursor: &psdbconnect.TableCursor{ - Shard: "-", - Keyspace: "connect-test", - Position: vgtid, - }, - Result: inserts, - Deletes: deletes, - }) - } - } - - syncClient := &connectSyncClientMock{ - syncResponses: responses, - } - - getCurrentVGtidClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - responses[currentVGtidPosition], - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - if in.Cursor.Position == "current" { - return getCurrentVGtidClient, nil - } - - return syncClient, nil - }, - } - - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - insertedRowCounter := 0 - deletedRowCounter := 0 - onRow := func(res *sqltypes.Result, op Operation) error { - if op == OpType_Insert { - insertedRowCounter += 1 - } - if op == OpType_Delete { - deletedRowCounter += 1 - } - return nil - } - onCursor := func(*psdbconnect.TableCursor) error { - return nil - } - sc, err := ped.Read(context.Background(), dbl, ps, "customers", nil, responses[0].Cursor, onRow, onCursor, nil) - - assert.NoError(t, err) - // sync should start at the first vgtid - esc, err := TableCursorToSerializedCursor(responses[nextVGtidPosition].Cursor) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 2, cc.syncFnInvokedCount) - - assert.Equal(t, "[connect-test:customers shard : -] Finished reading all rows for table [customers]", dbl.messages[len(dbl.messages)-1].message) - assert.Equal(t, 2*(nextVGtidPosition/3), insertedRowCounter) - assert.Equal(t, 2*(nextVGtidPosition/3), deletedRowCounter) -} diff --git a/lib/mysql_client.go b/lib/mysql_client.go deleted file mode 100644 index 054f557..0000000 --- a/lib/mysql_client.go +++ /dev/null @@ -1,201 +0,0 @@ -package lib - -import ( - "context" - "database/sql" - "fmt" - "regexp" - "strings" - "time" - - "github.com/pkg/errors" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" -) - -const ( - gCTableNameExpression string = `^_vt_(HOLD|PURGE|EVAC|DROP)_([0-f]{32})_([0-9]{14})$` -) - -var gcTableNameRegexp = regexp.MustCompile(gCTableNameExpression) - -type MysqlClient interface { - BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error - PingContext(context.Context, PlanetScaleSource) error - GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) - Close() error -} - -func NewMySQL(psc *PlanetScaleSource) (MysqlClient, error) { - db, err := sql.Open("mysql", psc.DSN(psdbconnect.TabletType_primary)) - if err != nil { - return nil, err - } - - return mysqlClient{ - db: db, - }, nil -} - -type mysqlClient struct { - db *sql.DB -} - -// BuildSchema returns schemas for all tables in a PlanetScale database -// 1. Get all keyspaces for the PlanetScale database -// 2. Get the schemas for all tables in a keyspace, for each keyspace -// 2. Get columns and primary keys for each table from information_schema.columns -// 3. Format results into FiveTran response -func (p mysqlClient) BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error { - keyspaces, err := p.GetKeyspaces(ctx, psc) - if err != nil { - return errors.Wrap(err, "Unable to build schema for database") - } - - for _, keyspaceName := range keyspaces { - schemaBuilder.OnKeyspace(keyspaceName) - tableNames, err := p.getKeyspaceTableNames(ctx, keyspaceName) - if err != nil { - return errors.Wrap(err, "Unable to build schema for database") - } - - for _, tableName := range tableNames { - schemaBuilder.OnTable(keyspaceName, tableName) - - columns, err := p.getKeyspaceTableColumns(ctx, keyspaceName, tableName) - if err != nil { - return errors.Wrap(err, "Unable to build schema for database") - } - - schemaBuilder.OnColumns(keyspaceName, tableName, columns) - } - } - - return nil -} - -func (p mysqlClient) Close() error { - return p.db.Close() -} - -func (p mysqlClient) getKeyspaceTableColumns(ctx context.Context, keyspaceName string, tableName string) ([]MysqlColumn, error) { - var columns []MysqlColumn - columnNamesQR, err := p.db.QueryContext( - ctx, - "select column_name, column_type, column_key from information_schema.columns where table_name=? AND table_schema=?;", - tableName, keyspaceName, - ) - if err != nil { - return nil, errors.Wrapf(err, "Unable to get column names & types for table %v", tableName) - } - for columnNamesQR.Next() { - var ( - name string - columnType string - columnKey string - ) - if err = columnNamesQR.Scan(&name, &columnType, &columnKey); err != nil { - return nil, errors.Wrapf(err, "Unable to scan row for column names & types of table %v", tableName) - } - - columns = append(columns, MysqlColumn{ - Name: name, - Type: columnType, - IsPrimaryKey: strings.EqualFold(columnKey, "PRI"), - }) - } - - if err := columnNamesQR.Err(); err != nil { - return nil, errors.Wrapf(err, "unable to iterate columns for table %s", tableName) - } - - return columns, nil -} - -func (p mysqlClient) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - var shards []string - - // TODO: is there a prepared statement equivalent? - shardNamesQR, err := p.db.QueryContext( - ctx, - `show vitess_shards like "%`+psc.Database+`%";`, - ) - if err != nil { - return shards, errors.Wrap(err, "Unable to query database for shards") - } - - for shardNamesQR.Next() { - var name string - if err = shardNamesQR.Scan(&name); err != nil { - return shards, errors.Wrap(err, "unable to get shard names") - } - - shards = append(shards, strings.TrimPrefix(name, psc.Database+"/")) - } - - if err := shardNamesQR.Err(); err != nil { - return shards, errors.Wrapf(err, "unable to iterate shard names for %s", psc.Database) - } - return shards, nil -} - -func (p mysqlClient) PingContext(ctx context.Context, psc PlanetScaleSource) error { - ctx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - return p.db.PingContext(ctx) -} - -func (p mysqlClient) getKeyspaceTableNames(ctx context.Context, keyspaceName string) ([]string, error) { - var tables []string - - tableNamesQR, err := p.db.Query(fmt.Sprintf("show tables from `%s`;", keyspaceName)) - if err != nil { - return tables, errors.Wrap(err, "Unable to query database for schema") - } - - for tableNamesQR.Next() { - var name string - if err = tableNamesQR.Scan(&name); err != nil { - return tables, errors.Wrap(err, "unable to get table names") - } - - // skip any tables that are vitess GC tables. - if !gcTableNameRegexp.MatchString(name) { - tables = append(tables, name) - } - - tables = append(tables, name) - } - - if err := tableNamesQR.Err(); err != nil { - return tables, errors.Wrap(err, "unable to iterate table rows") - } - - return tables, err -} - -func (p mysqlClient) GetKeyspaces(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - var keyspaces []string - - // TODO: is there a prepared statement equivalent? - shardNamesQR, err := p.db.QueryContext( - ctx, - `show vitess_keyspaces like "%`+psc.Database+`%";`, - ) - if err != nil { - return keyspaces, errors.Wrap(err, "Unable to query database for keyspaces") - } - - for shardNamesQR.Next() { - var name string - if err = shardNamesQR.Scan(&name); err != nil { - return keyspaces, errors.Wrap(err, "unable to get shard names") - } - - keyspaces = append(keyspaces, strings.TrimPrefix(name, psc.Database+"/")) - } - - if err := shardNamesQR.Err(); err != nil { - return keyspaces, errors.Wrapf(err, "unable to iterate shard names for %s", psc.Database) - } - return keyspaces, nil -} diff --git a/lib/planetscale_source.go b/lib/planetscale_source.go deleted file mode 100644 index 7af7c17..0000000 --- a/lib/planetscale_source.go +++ /dev/null @@ -1,97 +0,0 @@ -package lib - -import ( - "fmt" - "os" - "strings" - - "github.com/go-sql-driver/mysql" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" -) - -// PlanetScaleSource defines a configured FiveTran Source for a PlanetScale database -// Consider this a connection string to a PlanetScale database. -type PlanetScaleSource struct { - Host string `json:"host"` - Database string `json:"database"` - Username string `json:"username"` - Password string `json:"password"` - Shards string `json:"shards"` - TreatTinyIntAsBoolean bool `json:"treat_tiny_int_as_boolean"` - UseReplica bool `json:"use_replica"` -} - -// DSN returns a DataSource that mysql libraries can use to connect to a PlanetScale database. -func (psc PlanetScaleSource) DSN(tt psdbconnect.TabletType) string { - config := mysql.NewConfig() - config.Net = "tcp" - config.Addr = psc.Host - config.User = psc.Username - config.DBName = psc.Database - config.Passwd = psc.Password - - if useSecureConnection() { - config.TLSConfig = "true" - config.DBName = fmt.Sprintf("%v@%v", psc.Database, tabletTypeToString(tt)) - } else { - config.TLSConfig = "skip-verify" - } - return config.FormatDSN() -} - -func tabletTypeToString(t psdbconnect.TabletType) string { - if t == psdbconnect.TabletType_replica { - return "replica" - } - - return "primary" -} - -func useSecureConnection() bool { - e2eTestRun, found := os.LookupEnv("PS_END_TO_END_TEST_RUN") - if found && (e2eTestRun == "yes" || - e2eTestRun == "y" || - e2eTestRun == "true" || - e2eTestRun == "1") { - return false - } - - return true -} - -// GetInitialState will return the initial/blank state for a given keyspace in all of its shards. -// This state can be round-tripped safely with FiveTran. -func (psc PlanetScaleSource) GetInitialState(keyspaceOrDatabase string, shards []string) (ShardStates, error) { - shardCursors := ShardStates{ - Shards: map[string]*SerializedCursor{}, - } - - if len(psc.Shards) > 0 { - configuredShards := strings.Split(psc.Shards, ",") - foundShards := map[string]bool{} - for _, existingShard := range shards { - foundShards[existingShard] = true - } - - for _, configuredShard := range configuredShards { - if len(configuredShard) > 0 { - if _, ok := foundShards[strings.TrimSpace(configuredShard)]; !ok { - return shardCursors, fmt.Errorf("shard %v does not exist on the source database", configuredShard) - } - } - } - - // if we got this far, all the shards that the customer asked for exist in the PlanetScale database. - shards = configuredShards - } - - for _, shard := range shards { - shardCursors.Shards[shard], _ = TableCursorToSerializedCursor(&psdbconnect.TableCursor{ - Shard: shard, - Keyspace: keyspaceOrDatabase, - Position: "", - }) - } - - return shardCursors, nil -} diff --git a/lib/test_types.go b/lib/test_types.go deleted file mode 100644 index f712d7b..0000000 --- a/lib/test_types.go +++ /dev/null @@ -1,127 +0,0 @@ -package lib - -import ( - "context" - "io" - - "github.com/pkg/errors" - - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "google.golang.org/grpc" -) - -type dbLogMessage struct { - message string -} -type dbLogger struct { - messages []dbLogMessage -} - -func (dbl *dbLogger) Info(s string) { - dbl.messages = append(dbl.messages, dbLogMessage{ - message: s, - }) -} - -type clientConnectionMock struct { - syncFn func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) - syncFnInvoked bool - syncFnInvokedCount int -} - -type connectSyncClientMock struct { - lastResponseSent int - syncResponses []*psdbconnect.SyncResponse - grpc.ClientStream -} - -func (x *connectSyncClientMock) Recv() (*psdbconnect.SyncResponse, error) { - if x.lastResponseSent >= len(x.syncResponses) { - return nil, io.EOF - } - x.lastResponseSent += 1 - return x.syncResponses[x.lastResponseSent-1], nil -} - -func (c *clientConnectionMock) Sync(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - c.syncFnInvoked = true - c.syncFnInvokedCount += 1 - return c.syncFn(ctx, in, opts...) -} - -type ( - BuildSchemaFunc func(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error - PingContextFunc func(context.Context, PlanetScaleSource) error - GetVitessShardsFunc func(ctx context.Context, psc PlanetScaleSource) ([]string, error) - TestMysqlClient struct { - BuildSchemaFn BuildSchemaFunc - PingContextFn PingContextFunc - GetVitessShardsFn GetVitessShardsFunc - } -) - -func (t TestMysqlClient) BuildSchema(ctx context.Context, psc PlanetScaleSource, schemaBuilder SchemaBuilder) error { - if t.BuildSchemaFn != nil { - return t.BuildSchemaFn(ctx, psc, schemaBuilder) - } - - panic("BuildSchema is not implemented") -} - -func (t TestMysqlClient) PingContext(ctx context.Context, source PlanetScaleSource) error { - if t.PingContextFn != nil { - return t.PingContextFn(ctx, source) - } - - panic("PingContext is not implemented") -} - -func (t TestMysqlClient) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - if t.GetVitessShardsFn != nil { - return t.GetVitessShardsFn(ctx, psc) - } - panic("GetvitessShards is not implemented") -} - -func (t TestMysqlClient) Close() error { - return nil -} - -type ( - ReadFunc func(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, tc *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) - CanConnectFunc func(ctx context.Context, ps PlanetScaleSource) error - ListShardsFunc func(ctx context.Context, ps PlanetScaleSource) ([]string, error) - - TestConnectClient struct { - ReadFn ReadFunc - CanConnectFn CanConnectFunc - ListShardsFn ListShardsFunc - } -) - -func (tcc *TestConnectClient) ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) { - if tcc.ListShardsFn != nil { - return tcc.ListShardsFn(ctx, ps) - } - - panic("implement me") -} - -func (tcc *TestConnectClient) CanConnect(ctx context.Context, ps PlanetScaleSource) error { - if tcc.CanConnectFn != nil { - return tcc.CanConnectFn(ctx, ps) - } - return errors.New("CanConnect is Unimplemented") -} - -func (tcc *TestConnectClient) Read(ctx context.Context, logger DatabaseLogger, ps PlanetScaleSource, tableName string, columns []string, lastKnownPosition *psdbconnect.TableCursor, onResult OnResult, onCursor OnCursor, onUpdate OnUpdate) (*SerializedCursor, error) { - if tcc.ReadFn != nil { - return tcc.ReadFn(ctx, logger, ps, tableName, columns, lastKnownPosition, onResult, onCursor, onUpdate) - } - - return nil, errors.New("Read is Unimplemented") -} - -func NewTestConnectClient(r ReadFunc) ConnectClient { - return &TestConnectClient{ReadFn: r} -} diff --git a/lib/types.go b/lib/types.go deleted file mode 100644 index ccfab4d..0000000 --- a/lib/types.go +++ /dev/null @@ -1,78 +0,0 @@ -package lib - -import ( - "encoding/base64" - - "vitess.io/vitess/go/sqltypes" - - "github.com/pkg/errors" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/psdb/core/codec" -) - -type Operation int64 - -const ( - OpType_Insert Operation = iota - OpType_Update - OpType_Delete -) - -type UpdatedRow struct { - Before *sqltypes.Result - After *sqltypes.Result -} -type MysqlColumn struct { - Name string - Type string - IsPrimaryKey bool -} - -type SchemaBuilder interface { - OnKeyspace(keyspaceName string) - OnTable(keyspaceName, tableName string) - OnColumns(keyspaceName, tableName string, columns []MysqlColumn) -} - -func (s SerializedCursor) SerializedCursorToTableCursor() (*psdbconnect.TableCursor, error) { - var tc psdbconnect.TableCursor - decoded, err := base64.StdEncoding.DecodeString(s.Cursor) - if err != nil { - return nil, errors.Wrap(err, "unable to decode table cursor") - } - - err = codec.DefaultCodec.Unmarshal(decoded, &tc) - if err != nil { - return nil, errors.Wrap(err, "unable to deserialize table cursor") - } - - return &tc, nil -} - -func TableCursorToSerializedCursor(cursor *psdbconnect.TableCursor) (*SerializedCursor, error) { - d, err := codec.DefaultCodec.Marshal(cursor) - if err != nil { - return nil, errors.Wrap(err, "unable to marshal table cursor to save staate.") - } - - sc := &SerializedCursor{ - Cursor: base64.StdEncoding.EncodeToString(d), - } - return sc, nil -} - -type SerializedCursor struct { - Cursor string `json:"cursor"` -} - -type ShardStates struct { - Shards map[string]*SerializedCursor `json:"shards"` -} - -type KeyspaceState struct { - Streams map[string]ShardStates `json:"streams"` -} - -type SyncState struct { - Keyspaces map[string]KeyspaceState `json:"keyspaces"` -} From ab23a828f726b62b8c8644efcc0eb55044cdd886 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 14:48:52 -0600 Subject: [PATCH 04/13] Implement Discover and Check commands using SDK --- cmd/airbyte-source/check.go | 24 +++++++-- cmd/airbyte-source/discover.go | 20 +++++++- cmd/airbyte-source/test_types.go | 4 -- cmd/internal/planetscale_edge_database.go | 59 ----------------------- cmd/internal/schema_builder.go | 22 +++++++-- 5 files changed, 56 insertions(+), 73 deletions(-) diff --git a/cmd/airbyte-source/check.go b/cmd/airbyte-source/check.go index 91ba09d..6f34724 100644 --- a/cmd/airbyte-source/check.go +++ b/cmd/airbyte-source/check.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "github.com/planetscale/connectsdk/lib" "os" "github.com/planetscale/airbyte-source/cmd/internal" @@ -49,7 +50,7 @@ func CheckCommand(ch *Helper) *cobra.Command { } }() - cs, _ := checkConnectionStatus(ch.Database, psc) + cs, _ := checkConnectionStatus(psc) ch.Logger.ConnectionStatus(cs) }, } @@ -70,9 +71,24 @@ func parseSource(reader FileReader, configFilePath string) (internal.PlanetScale return psc, nil } -func checkConnectionStatus(database internal.PlanetScaleDatabase, psc internal.PlanetScaleSource) (internal.ConnectionStatus, error) { - - if err := database.CanConnect(context.Background(), psc); err != nil { +func checkConnectionStatus(psc internal.PlanetScaleSource) (internal.ConnectionStatus, error) { + libpsc := lib.PlanetScaleSource{ + UseReplica: true, + Username: psc.Username, + Database: psc.Database, + Host: psc.Host, + Password: psc.Password, + TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, + } + mc, err := lib.NewMySQL(&libpsc) + if err != nil { + return internal.ConnectionStatus{ + Status: "FAILED", + Message: fmt.Sprintf("Unable to connect to PlanetScale database %v at host %v with username %v. Failed with \n %v", psc.Database, psc.Host, psc.Username, err), + }, err + } + cc := lib.NewConnectClient(&mc) + if err := cc.CanConnect(context.Background(), libpsc); err != nil { return internal.ConnectionStatus{ Status: "FAILED", Message: fmt.Sprintf("Unable to connect to PlanetScale database %v at host %v with username %v. Failed with \n %v", psc.Database, psc.Host, psc.Username, err), diff --git a/cmd/airbyte-source/discover.go b/cmd/airbyte-source/discover.go index 7d78a65..ccde24e 100644 --- a/cmd/airbyte-source/discover.go +++ b/cmd/airbyte-source/discover.go @@ -6,6 +6,7 @@ import ( "os" "github.com/planetscale/airbyte-source/cmd/internal" + "github.com/planetscale/connectsdk/lib" "github.com/spf13/cobra" ) @@ -40,7 +41,7 @@ func DiscoverCommand(ch *Helper) *cobra.Command { return } - cs, err := checkConnectionStatus(ch.Database, psc) + cs, err := checkConnectionStatus(psc) if err != nil { ch.Logger.ConnectionStatus(cs) return @@ -52,12 +53,27 @@ func DiscoverCommand(ch *Helper) *cobra.Command { } }() - c, err := ch.Database.DiscoverSchema(context.Background(), psc) + libpsc := lib.PlanetScaleSource{ + UseReplica: true, + Username: psc.Username, + Database: psc.Database, + Host: psc.Host, + Password: psc.Password, + TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, + } + mc, err := lib.NewMySQL(&libpsc) if err != nil { ch.Logger.Log(internal.LOGLEVEL_ERROR, fmt.Sprintf("Unable to discover database, failed with [%v]", err)) return } + sb := internal.NewSchemaBuilder() + if err := mc.BuildSchema(context.Background(), libpsc, sb); err != nil { + ch.Logger.Log(internal.LOGLEVEL_ERROR, fmt.Sprintf("Unable to discover database, failed with [%v]", err)) + return + } + + c := sb.(*internal.SchemaBuilder).GetCatalog() ch.Logger.Catalog(c) }, } diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index da8230f..de7a3c6 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -39,10 +39,6 @@ func (td testDatabase) HasTabletType(ctx context.Context, psc internal.PlanetSca return true, nil } -func (td testDatabase) DiscoverSchema(ctx context.Context, ps internal.PlanetScaleSource) (internal.Catalog, error) { - return td.discoverSchemaResponse.catalog, td.discoverSchemaResponse.err -} - func (td testDatabase) Read(ctx context.Context, w io.Writer, ps internal.PlanetScaleSource, s internal.ConfiguredStream, tc *psdbconnect.TableCursor) (*internal.SerializedCursor, error) { // TODO implement me panic("implement me") diff --git a/cmd/internal/planetscale_edge_database.go b/cmd/internal/planetscale_edge_database.go index 3f10b26..7a870df 100644 --- a/cmd/internal/planetscale_edge_database.go +++ b/cmd/internal/planetscale_edge_database.go @@ -24,7 +24,6 @@ import ( // that defines all the data access methods needed for the PlanetScale Airbyte source to function. type PlanetScaleDatabase interface { CanConnect(ctx context.Context, ps PlanetScaleSource) error - DiscoverSchema(ctx context.Context, ps PlanetScaleSource) (Catalog, error) ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) Read(ctx context.Context, w io.Writer, ps PlanetScaleSource, s ConfiguredStream, tc *psdbconnect.TableCursor) (*SerializedCursor, error) Close() error @@ -66,64 +65,6 @@ func (p PlanetScaleEdgeDatabase) checkEdgePassword(ctx context.Context, psc Plan return nil } -func (p PlanetScaleEdgeDatabase) DiscoverSchema(ctx context.Context, psc PlanetScaleSource) (Catalog, error) { - var c Catalog - - tables, err := p.Mysql.GetTableNames(ctx, psc) - if err != nil { - return c, errors.Wrap(err, "Unable to query database for schema") - } - - for _, tableName := range tables { - stream, err := p.getStreamForTable(ctx, psc, tableName) - if err != nil { - return c, errors.Wrapf(err, "unable to get stream for table %v", tableName) - } - c.Streams = append(c.Streams, stream) - } - return c, nil -} - -func (p PlanetScaleEdgeDatabase) getStreamForTable(ctx context.Context, psc PlanetScaleSource, tableName string) (Stream, error) { - schema := StreamSchema{ - Type: "object", - Properties: map[string]PropertyType{}, - } - stream := Stream{ - Name: tableName, - Schema: schema, - SupportedSyncModes: []string{"full_refresh", "incremental"}, - Namespace: psc.Database, - } - - var err error - stream.Schema.Properties, err = p.Mysql.GetTableSchema(ctx, psc, tableName) - if err != nil { - return stream, errors.Wrapf(err, "Unable to get column names & types for table %v", tableName) - } - - // need this otherwise Airbyte will fail schema discovery for views - // without primary keys. - stream.PrimaryKeys = [][]string{} - stream.DefaultCursorFields = []string{} - - primaryKeys, err := p.Mysql.GetTablePrimaryKeys(ctx, psc, tableName) - if err != nil { - return stream, errors.Wrapf(err, "unable to iterate primary keys for table %s", tableName) - } - for _, key := range primaryKeys { - stream.PrimaryKeys = append(stream.PrimaryKeys, []string{key}) - } - - // pick the last key field as the default cursor field. - if len(primaryKeys) > 0 { - stream.DefaultCursorFields = append(stream.DefaultCursorFields, primaryKeys[len(primaryKeys)-1]) - } - - stream.SourceDefinedCursor = true - return stream, nil -} - // Convert columnType to Airbyte type. func getJsonSchemaType(mysqlType string, treatTinyIntAsBoolean bool) PropertyType { // Support custom airbyte types documented here : diff --git a/cmd/internal/schema_builder.go b/cmd/internal/schema_builder.go index a47b817..4403dd1 100644 --- a/cmd/internal/schema_builder.go +++ b/cmd/internal/schema_builder.go @@ -14,7 +14,11 @@ var gcTableNameRegexp = regexp.MustCompile(gCTableNameExpression) type SchemaBuilder struct { catalog *Catalog - streams map[string]map[string]Stream + streams map[string]map[string]*Stream +} + +func NewSchemaBuilder() lib.SchemaBuilder { + return &SchemaBuilder{} } func (sb *SchemaBuilder) OnKeyspace(_ string) { @@ -32,15 +36,19 @@ func (sb *SchemaBuilder) OnTable(keyspaceName, tableName string) { Properties: map[string]PropertyType{}, } - stream := Stream{ + stream := &Stream{ Name: tableName, Schema: schema, SupportedSyncModes: []string{"full_refresh", "incremental"}, Namespace: keyspaceName, } + if sb.streams == nil { + sb.streams = make(map[string]map[string]*Stream) + } + if _, ok := sb.streams[keyspaceName]; !ok { - sb.streams[keyspaceName] = make(map[string]Stream) + sb.streams[keyspaceName] = make(map[string]*Stream) } sb.streams[keyspaceName][tableName] = stream @@ -70,7 +78,13 @@ func (sb *SchemaBuilder) OnColumns(keyspaceName, tableName string, columns []lib } func (sb *SchemaBuilder) GetCatalog() Catalog { - return *sb.catalog + c := Catalog{} + for _, keyspace := range sb.streams { + for _, table := range keyspace { + c.Streams = append(c.Streams, *table) + } + } + return c } // Convert columnType to Airbyte type. From cf14b3849df9da8ed0c05ecce658f81d4267b719 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 14:56:09 -0600 Subject: [PATCH 05/13] remove unused code --- cmd/airbyte-source/read.go | 2 +- cmd/internal/planetscale_edge_database.go | 28 -------- cmd/internal/planetscale_edge_mysql.go | 88 ----------------------- 3 files changed, 1 insertion(+), 117 deletions(-) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index 77c50fa..cb82811 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -55,7 +55,7 @@ func ReadCommand(ch *Helper) *cobra.Command { } }() - cs, err := checkConnectionStatus(ch.Database, psc) + cs, err := checkConnectionStatus(psc) if err != nil { ch.Logger.ConnectionStatus(cs) return diff --git a/cmd/internal/planetscale_edge_database.go b/cmd/internal/planetscale_edge_database.go index 7a870df..574d1b0 100644 --- a/cmd/internal/planetscale_edge_database.go +++ b/cmd/internal/planetscale_edge_database.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "net/http" "strings" "time" @@ -38,33 +37,6 @@ type PlanetScaleEdgeDatabase struct { clientFn func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) } -func (p PlanetScaleEdgeDatabase) CanConnect(ctx context.Context, psc PlanetScaleSource) error { - if err := p.checkEdgePassword(ctx, psc); err != nil { - return errors.Wrap(err, "Unable to initialize Connect Session") - } - - return p.Mysql.PingContext(ctx, psc) -} - -func (p PlanetScaleEdgeDatabase) checkEdgePassword(ctx context.Context, psc PlanetScaleSource) error { - if !strings.HasSuffix(psc.Host, ".connect.psdb.cloud") { - return errors.New("This password is not connect-enabled, please ensure that your organization is enrolled in the Connect beta.") - } - reqCtx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fmt.Sprintf("https://%v", psc.Host), nil) - if err != nil { - return err - } - - _, err = http.DefaultClient.Do(req) - if err != nil { - return errors.New(fmt.Sprintf("The database %q, hosted at %q, is inaccessible from this process", psc.Database, psc.Host)) - } - - return nil -} - // Convert columnType to Airbyte type. func getJsonSchemaType(mysqlType string, treatTinyIntAsBoolean bool) PropertyType { // Support custom airbyte types documented here : diff --git a/cmd/internal/planetscale_edge_mysql.go b/cmd/internal/planetscale_edge_mysql.go index 8950dbc..9a633fa 100644 --- a/cmd/internal/planetscale_edge_mysql.go +++ b/cmd/internal/planetscale_edge_mysql.go @@ -3,7 +3,6 @@ package internal import ( "context" "database/sql" - "fmt" "github.com/pkg/errors" "strings" "time" @@ -21,9 +20,6 @@ type VitessTablet struct { } type PlanetScaleEdgeMysqlAccess interface { PingContext(context.Context, PlanetScaleSource) error - GetTableNames(context.Context, PlanetScaleSource) ([]string, error) - GetTableSchema(context.Context, PlanetScaleSource, string) (map[string]PropertyType, error) - GetTablePrimaryKeys(context.Context, PlanetScaleSource, string) ([]string, error) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) GetVitessTablets(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) Close() error @@ -108,87 +104,3 @@ func (p planetScaleEdgeMySQLAccess) PingContext(ctx context.Context, psc PlanetS defer cancel() return p.db.PingContext(ctx) } - -func (p planetScaleEdgeMySQLAccess) GetTableNames(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - var tables []string - - tableNamesQR, err := p.db.Query(fmt.Sprintf("show tables from `%s`;", psc.Database)) - if err != nil { - return tables, errors.Wrap(err, "Unable to query database for schema") - } - - for tableNamesQR.Next() { - var name string - if err = tableNamesQR.Scan(&name); err != nil { - return tables, errors.Wrap(err, "unable to get table names") - } - - tables = append(tables, name) - } - - if err := tableNamesQR.Err(); err != nil { - return tables, errors.Wrap(err, "unable to iterate table rows") - } - - return tables, err -} - -func (p planetScaleEdgeMySQLAccess) GetTableSchema(ctx context.Context, psc PlanetScaleSource, tableName string) (map[string]PropertyType, error) { - properties := map[string]PropertyType{} - - columnNamesQR, err := p.db.QueryContext( - ctx, - "select column_name, column_type from information_schema.columns where table_name=? AND table_schema=?;", - tableName, psc.Database, - ) - if err != nil { - return properties, errors.Wrapf(err, "Unable to get column names & types for table %v", tableName) - } - - for columnNamesQR.Next() { - var ( - name string - columnType string - ) - if err = columnNamesQR.Scan(&name, &columnType); err != nil { - return properties, errors.Wrapf(err, "Unable to scan row for column names & types of table %v", tableName) - } - - properties[name] = getJsonSchemaType(columnType, !psc.Options.DoNotTreatTinyIntAsBoolean) - } - - if err := columnNamesQR.Err(); err != nil { - return properties, errors.Wrapf(err, "unable to iterate columns for table %s", tableName) - } - - return properties, nil -} - -func (p planetScaleEdgeMySQLAccess) GetTablePrimaryKeys(ctx context.Context, psc PlanetScaleSource, tableName string) ([]string, error) { - var primaryKeys []string - - primaryKeysQR, err := p.db.QueryContext( - ctx, - "select column_name from information_schema.columns where table_schema=? AND table_name=? AND column_key='PRI';", - psc.Database, tableName, - ) - - if err != nil { - return primaryKeys, errors.Wrapf(err, "Unable to scan row for primary keys of table %v", tableName) - } - - for primaryKeysQR.Next() { - var name string - if err = primaryKeysQR.Scan(&name); err != nil { - return primaryKeys, errors.Wrapf(err, "Unable to scan row for primary keys of table %v", tableName) - } - - primaryKeys = append(primaryKeys, name) - } - - if err := primaryKeysQR.Err(); err != nil { - return primaryKeys, errors.Wrapf(err, "unable to iterate primary keys for table %s", tableName) - } - - return primaryKeys, nil -} From 3985276a57ae8e4f3d386282b18620cbaf8139d6 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 15:11:37 -0600 Subject: [PATCH 06/13] move test for detecting airbyte type --- cmd/internal/planetscale_edge_database.go | 1 - .../planetscale_edge_database_test.go | 128 ----------------- cmd/internal/schema_builder_test.go | 136 ++++++++++++++++++ 3 files changed, 136 insertions(+), 129 deletions(-) create mode 100644 cmd/internal/schema_builder_test.go diff --git a/cmd/internal/planetscale_edge_database.go b/cmd/internal/planetscale_edge_database.go index 574d1b0..9c3ec00 100644 --- a/cmd/internal/planetscale_edge_database.go +++ b/cmd/internal/planetscale_edge_database.go @@ -22,7 +22,6 @@ import ( // PlanetScaleDatabase is a general purpose interface // that defines all the data access methods needed for the PlanetScale Airbyte source to function. type PlanetScaleDatabase interface { - CanConnect(ctx context.Context, ps PlanetScaleSource) error ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) Read(ctx context.Context, w io.Writer, ps PlanetScaleSource, s ConfiguredStream, tc *psdbconnect.TableCursor) (*SerializedCursor, error) Close() error diff --git a/cmd/internal/planetscale_edge_database_test.go b/cmd/internal/planetscale_edge_database_test.go index b492148..57e7549 100644 --- a/cmd/internal/planetscale_edge_database_test.go +++ b/cmd/internal/planetscale_edge_database_test.go @@ -202,134 +202,6 @@ func TestRead_CanPickReplicaForShardedKeyspaces(t *testing.T) { assert.False(t, tma.GetVitessTabletsFnInvoked) } -func TestDiscover_CanPickRightAirbyteType(t *testing.T) { - var tests = []struct { - MysqlType string - JSONSchemaType string - AirbyteType string - TreatTinyIntAsBoolean bool - }{ - { - MysqlType: "int(11)", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "smallint(4)", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "mediumint(8)", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "tinyint", - JSONSchemaType: "number", - AirbyteType: "integer", - TreatTinyIntAsBoolean: true, - }, - { - MysqlType: "tinyint(1)", - JSONSchemaType: "boolean", - AirbyteType: "", - TreatTinyIntAsBoolean: true, - }, - { - MysqlType: "tinyint(1) unsigned", - JSONSchemaType: "boolean", - AirbyteType: "", - TreatTinyIntAsBoolean: true, - }, - { - MysqlType: "tinyint(1)", - JSONSchemaType: "number", - AirbyteType: "integer", - TreatTinyIntAsBoolean: false, - }, - { - MysqlType: "tinyint(1) unsigned", - JSONSchemaType: "number", - AirbyteType: "integer", - TreatTinyIntAsBoolean: false, - }, - { - MysqlType: "bigint(16)", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "bigint unsigned", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "bigint zerofill", - JSONSchemaType: "number", - AirbyteType: "integer", - }, - { - MysqlType: "datetime", - JSONSchemaType: "string", - AirbyteType: "timestamp_without_timezone", - }, - { - MysqlType: "datetime(6)", - JSONSchemaType: "string", - AirbyteType: "timestamp_without_timezone", - }, - { - MysqlType: "time", - JSONSchemaType: "string", - AirbyteType: "time_without_timezone", - }, - { - MysqlType: "time(6)", - JSONSchemaType: "string", - AirbyteType: "time_without_timezone", - }, - { - MysqlType: "date", - JSONSchemaType: "string", - AirbyteType: "date", - }, - { - MysqlType: "text", - JSONSchemaType: "string", - AirbyteType: "", - }, - { - MysqlType: "varchar(256)", - JSONSchemaType: "string", - AirbyteType: "", - }, - { - MysqlType: "decimal(12,5)", - JSONSchemaType: "number", - AirbyteType: "", - }, - { - MysqlType: "double", - JSONSchemaType: "number", - AirbyteType: "", - }, - { - MysqlType: "float(30)", - JSONSchemaType: "number", - AirbyteType: "", - }, - } - - for _, typeTest := range tests { - - t.Run(fmt.Sprintf("mysql_type_%v", typeTest.MysqlType), func(t *testing.T) { - p := getJsonSchemaType(typeTest.MysqlType, typeTest.TreatTinyIntAsBoolean) - assert.Equal(t, typeTest.AirbyteType, p.AirbyteType) - assert.Equal(t, typeTest.JSONSchemaType, p.Type) - }) - } -} func TestRead_CanPickPrimaryForUnshardedKeyspaces(t *testing.T) { tma := getTestMysqlAccess() b := bytes.NewBufferString("") diff --git a/cmd/internal/schema_builder_test.go b/cmd/internal/schema_builder_test.go new file mode 100644 index 0000000..9cb6335 --- /dev/null +++ b/cmd/internal/schema_builder_test.go @@ -0,0 +1,136 @@ +package internal + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestSchemaBuilder_CanPickRightAirbyteType(t *testing.T) { + var tests = []struct { + MysqlType string + JSONSchemaType string + AirbyteType string + TreatTinyIntAsBoolean bool + }{ + { + MysqlType: "int(11)", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "smallint(4)", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "mediumint(8)", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "tinyint", + JSONSchemaType: "number", + AirbyteType: "integer", + TreatTinyIntAsBoolean: true, + }, + { + MysqlType: "tinyint(1)", + JSONSchemaType: "boolean", + AirbyteType: "", + TreatTinyIntAsBoolean: true, + }, + { + MysqlType: "tinyint(1) unsigned", + JSONSchemaType: "boolean", + AirbyteType: "", + TreatTinyIntAsBoolean: true, + }, + { + MysqlType: "tinyint(1)", + JSONSchemaType: "number", + AirbyteType: "integer", + TreatTinyIntAsBoolean: false, + }, + { + MysqlType: "tinyint(1) unsigned", + JSONSchemaType: "number", + AirbyteType: "integer", + TreatTinyIntAsBoolean: false, + }, + { + MysqlType: "bigint(16)", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "bigint unsigned", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "bigint zerofill", + JSONSchemaType: "number", + AirbyteType: "integer", + }, + { + MysqlType: "datetime", + JSONSchemaType: "string", + AirbyteType: "timestamp_without_timezone", + }, + { + MysqlType: "datetime(6)", + JSONSchemaType: "string", + AirbyteType: "timestamp_without_timezone", + }, + { + MysqlType: "time", + JSONSchemaType: "string", + AirbyteType: "time_without_timezone", + }, + { + MysqlType: "time(6)", + JSONSchemaType: "string", + AirbyteType: "time_without_timezone", + }, + { + MysqlType: "date", + JSONSchemaType: "string", + AirbyteType: "date", + }, + { + MysqlType: "text", + JSONSchemaType: "string", + AirbyteType: "", + }, + { + MysqlType: "varchar(256)", + JSONSchemaType: "string", + AirbyteType: "", + }, + { + MysqlType: "decimal(12,5)", + JSONSchemaType: "number", + AirbyteType: "", + }, + { + MysqlType: "double", + JSONSchemaType: "number", + AirbyteType: "", + }, + { + MysqlType: "float(30)", + JSONSchemaType: "number", + AirbyteType: "", + }, + } + + for _, typeTest := range tests { + + t.Run(fmt.Sprintf("mysql_type_%v", typeTest.MysqlType), func(t *testing.T) { + p := getAirbyteDataType(typeTest.MysqlType, typeTest.TreatTinyIntAsBoolean) + assert.Equal(t, typeTest.AirbyteType, p.AirbyteType) + assert.Equal(t, typeTest.JSONSchemaType, p.Type) + }) + } +} From 9c66ceaf22f973d5de92d9a781506dafb027b849 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Thu, 4 Jan 2024 21:54:42 -0600 Subject: [PATCH 07/13] use Read method from connectsdk --- cmd/airbyte-source/check.go | 29 +- cmd/airbyte-source/discover.go | 8 +- cmd/airbyte-source/helper.go | 27 +- cmd/airbyte-source/read.go | 57 +- cmd/internal/logger.go | 11 + cmd/internal/planetscale_edge_database.go | 302 --------- .../planetscale_edge_database_test.go | 593 ------------------ cmd/internal/planetscale_edge_mysql.go | 106 ---- 8 files changed, 77 insertions(+), 1056 deletions(-) delete mode 100644 cmd/internal/planetscale_edge_database.go delete mode 100644 cmd/internal/planetscale_edge_database_test.go delete mode 100644 cmd/internal/planetscale_edge_mysql.go diff --git a/cmd/airbyte-source/check.go b/cmd/airbyte-source/check.go index 6f34724..97542fb 100644 --- a/cmd/airbyte-source/check.go +++ b/cmd/airbyte-source/check.go @@ -44,13 +44,7 @@ func CheckCommand(ch *Helper) *cobra.Command { return } - defer func() { - if err := ch.Database.Close(); err != nil { - fmt.Fprintf(cmd.OutOrStdout(), "Unable to close connection to PlanetScale Database, failed with %v", err) - } - }() - - cs, _ := checkConnectionStatus(psc) + cs, _ := checkConnectionStatus(ch.ConnectClient, ch.Source) ch.Logger.ConnectionStatus(cs) }, } @@ -71,24 +65,9 @@ func parseSource(reader FileReader, configFilePath string) (internal.PlanetScale return psc, nil } -func checkConnectionStatus(psc internal.PlanetScaleSource) (internal.ConnectionStatus, error) { - libpsc := lib.PlanetScaleSource{ - UseReplica: true, - Username: psc.Username, - Database: psc.Database, - Host: psc.Host, - Password: psc.Password, - TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, - } - mc, err := lib.NewMySQL(&libpsc) - if err != nil { - return internal.ConnectionStatus{ - Status: "FAILED", - Message: fmt.Sprintf("Unable to connect to PlanetScale database %v at host %v with username %v. Failed with \n %v", psc.Database, psc.Host, psc.Username, err), - }, err - } - cc := lib.NewConnectClient(&mc) - if err := cc.CanConnect(context.Background(), libpsc); err != nil { +func checkConnectionStatus(connectClient lib.ConnectClient, psc lib.PlanetScaleSource) (internal.ConnectionStatus, error) { + + if err := connectClient.CanConnect(context.Background(), psc); err != nil { return internal.ConnectionStatus{ Status: "FAILED", Message: fmt.Sprintf("Unable to connect to PlanetScale database %v at host %v with username %v. Failed with \n %v", psc.Database, psc.Host, psc.Username, err), diff --git a/cmd/airbyte-source/discover.go b/cmd/airbyte-source/discover.go index ccde24e..4d88484 100644 --- a/cmd/airbyte-source/discover.go +++ b/cmd/airbyte-source/discover.go @@ -41,18 +41,12 @@ func DiscoverCommand(ch *Helper) *cobra.Command { return } - cs, err := checkConnectionStatus(psc) + cs, err := checkConnectionStatus(ch.ConnectClient, ch.Source) if err != nil { ch.Logger.ConnectionStatus(cs) return } - defer func() { - if err := ch.Database.Close(); err != nil { - fmt.Fprintf(cmd.OutOrStdout(), "Unable to close connection to PlanetScale Database, failed with %v", err) - } - }() - libpsc := lib.PlanetScaleSource{ UseReplica: true, Username: psc.Username, diff --git a/cmd/airbyte-source/helper.go b/cmd/airbyte-source/helper.go index 3e01f9f..29fc469 100644 --- a/cmd/airbyte-source/helper.go +++ b/cmd/airbyte-source/helper.go @@ -2,14 +2,17 @@ package airbyte_source import ( "github.com/planetscale/airbyte-source/cmd/internal" + "github.com/planetscale/connectsdk/lib" "io" "os" ) type Helper struct { - Database internal.PlanetScaleDatabase - FileReader FileReader - Logger internal.AirbyteLogger + MysqlClient lib.MysqlClient + ConnectClient lib.ConnectClient + Source lib.PlanetScaleSource + FileReader FileReader + Logger internal.AirbyteLogger } type FileReader interface { @@ -31,18 +34,20 @@ func DefaultHelper(w io.Writer) *Helper { } func (h *Helper) EnsureDB(psc internal.PlanetScaleSource) error { - if h.Database != nil { - return nil + h.Source = lib.PlanetScaleSource{ + UseReplica: true, + Username: psc.Username, + Database: psc.Database, + Host: psc.Host, + Password: psc.Password, + TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, } - - mysql, err := internal.NewMySQL(&psc) + var err error + h.MysqlClient, err = lib.NewMySQL(&h.Source) if err != nil { return err } - h.Database = internal.PlanetScaleEdgeDatabase{ - Logger: h.Logger, - Mysql: mysql, - } + h.ConnectClient = lib.NewConnectClient(&h.MysqlClient) return nil } diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index cb82811..80f94d7 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -4,7 +4,10 @@ import ( "context" "encoding/json" "fmt" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/connectsdk/lib" "os" + "vitess.io/vitess/go/sqltypes" "github.com/planetscale/airbyte-source/cmd/internal" "github.com/spf13/cobra" @@ -49,13 +52,7 @@ func ReadCommand(ch *Helper) *cobra.Command { return } - defer func() { - if err := ch.Database.Close(); err != nil { - fmt.Fprintf(cmd.OutOrStdout(), "Unable to close connection to PlanetScale Database, failed with %v", err) - } - }() - - cs, err := checkConnectionStatus(psc) + cs, err := checkConnectionStatus(ch.ConnectClient, ch.Source) if err != nil { ch.Logger.ConnectionStatus(cs) return @@ -81,7 +78,8 @@ func ReadCommand(ch *Helper) *cobra.Command { } state = string(b) } - shards, err := ch.Database.ListShards(context.Background(), psc) + + shards, err := ch.ConnectClient.ListShards(context.Background(), ch.Source) if err != nil { ch.Logger.Error(fmt.Sprintf("Unable to list shards : %v", err)) os.Exit(1) @@ -93,6 +91,22 @@ func ReadCommand(ch *Helper) *cobra.Command { os.Exit(1) } + libpsc := lib.PlanetScaleSource{ + UseReplica: false, + Username: psc.Username, + Database: psc.Database, + Host: psc.Host, + Password: psc.Password, + TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, + } + mc, err := lib.NewMySQL(&libpsc) + if err != nil { + ch.Logger.Error(fmt.Sprintf("Unable to read state : %v", err)) + os.Exit(1) + } + cc := lib.NewConnectClient(&mc) + allColumns := []string{} + for _, table := range catalog.Streams { keyspaceOrDatabase := table.Stream.Namespace if keyspaceOrDatabase == "" { @@ -105,24 +119,43 @@ func ReadCommand(ch *Helper) *cobra.Command { os.Exit(1) } - for shardName, shardState := range streamState.Shards { + for _, shardState := range streamState.Shards { tc, err := shardState.SerializedCursorToTableCursor(table) if err != nil { ch.Logger.Error(fmt.Sprintf("invalid cursor for stream %v, failed with [%v]", streamStateKey, err)) os.Exit(1) } - sc, err := ch.Database.Read(context.Background(), cmd.OutOrStdout(), psc, table, tc) + onResult := func(qr *sqltypes.Result, _ lib.Operation) error { + data := internal.QueryResultToRecords(qr) + + for _, record := range data { + ch.Logger.Record(keyspaceOrDatabase, table.Stream.Name, record) + } + return nil + } + + onUpdate := func(*lib.UpdatedRow) error { + return nil + } + onCursor := func(*psdbconnect.TableCursor) error { + //syncState.Streams[streamStateKey].Shards[shardName] = sc + ch.Logger.Flush() + ch.Logger.State(syncState) + return nil + } + + sc, err := cc.Read(context.Background(), ch.Logger, libpsc, table.Stream.Name, allColumns, tc, onResult, onCursor, onUpdate) if err != nil { ch.Logger.Error(err.Error()) os.Exit(1) } - if sc != nil { // if we get any new state, we assign it here. // otherwise, the older state is round-tripped back to Airbyte. - syncState.Streams[streamStateKey].Shards[shardName] = sc + //syncState.Streams[streamStateKey].Shards[shardName] = sc } + ch.Logger.Flush() ch.Logger.State(syncState) } } diff --git a/cmd/internal/logger.go b/cmd/internal/logger.go index 49f060b..4f6ab06 100644 --- a/cmd/internal/logger.go +++ b/cmd/internal/logger.go @@ -14,6 +14,7 @@ type AirbyteLogger interface { Flush() State(syncState SyncState) Error(error string) + Info(message string) } const MaxBatchSize = 10000 @@ -91,6 +92,16 @@ func (a *airbyteLogger) Error(error string) { }) } +func (a *airbyteLogger) Info(message string) { + a.recordEncoder.Encode(AirbyteMessage{ + Type: LOG, + Log: &AirbyteLogMessage{ + Level: LOGLEVEL_INFO, + Message: message, + }, + }) +} + func (a *airbyteLogger) ConnectionStatus(status ConnectionStatus) { a.recordEncoder.Encode(AirbyteMessage{ Type: CONNECTION_STATUS, diff --git a/cmd/internal/planetscale_edge_database.go b/cmd/internal/planetscale_edge_database.go deleted file mode 100644 index 9c3ec00..0000000 --- a/cmd/internal/planetscale_edge_database.go +++ /dev/null @@ -1,302 +0,0 @@ -package internal - -import ( - "context" - "fmt" - "io" - "strings" - "time" - - "github.com/pkg/errors" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/psdb/auth" - grpcclient "github.com/planetscale/psdb/core/pool" - clientoptions "github.com/planetscale/psdb/core/pool/options" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - "vitess.io/vitess/go/sqltypes" - _ "vitess.io/vitess/go/vt/vtctl/grpcvtctlclient" - _ "vitess.io/vitess/go/vt/vtgate/grpcvtgateconn" -) - -// PlanetScaleDatabase is a general purpose interface -// that defines all the data access methods needed for the PlanetScale Airbyte source to function. -type PlanetScaleDatabase interface { - ListShards(ctx context.Context, ps PlanetScaleSource) ([]string, error) - Read(ctx context.Context, w io.Writer, ps PlanetScaleSource, s ConfiguredStream, tc *psdbconnect.TableCursor) (*SerializedCursor, error) - Close() error -} - -// PlanetScaleEdgeDatabase is an implementation of the PlanetScaleDatabase interface defined above. -// It uses the mysql interface provided by PlanetScale for all schema/shard/tablet discovery and -// the grpc API for incrementally syncing rows from PlanetScale. -type PlanetScaleEdgeDatabase struct { - Logger AirbyteLogger - Mysql PlanetScaleEdgeMysqlAccess - clientFn func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) -} - -// Convert columnType to Airbyte type. -func getJsonSchemaType(mysqlType string, treatTinyIntAsBoolean bool) PropertyType { - // Support custom airbyte types documented here : - // https://docs.airbyte.com/understanding-airbyte/supported-data-types/#the-types - switch { - case strings.HasPrefix(mysqlType, "tinyint(1)"): - if treatTinyIntAsBoolean { - return PropertyType{Type: "boolean"} - } - return PropertyType{Type: "number", AirbyteType: "integer"} - case strings.HasPrefix(mysqlType, "int"), strings.HasPrefix(mysqlType, "smallint"), strings.HasPrefix(mysqlType, "mediumint"), strings.HasPrefix(mysqlType, "bigint"), strings.HasPrefix(mysqlType, "tinyint"): - return PropertyType{Type: "number", AirbyteType: "integer"} - case strings.HasPrefix(mysqlType, "decimal"), strings.HasPrefix(mysqlType, "double"), strings.HasPrefix(mysqlType, "float"): - return PropertyType{Type: "number"} - case strings.HasPrefix(mysqlType, "datetime"), strings.HasPrefix(mysqlType, "timestamp"): - return PropertyType{Type: "string", CustomFormat: "date-time", AirbyteType: "timestamp_without_timezone"} - case strings.HasPrefix(mysqlType, "date"): - return PropertyType{Type: "string", CustomFormat: "date", AirbyteType: "date"} - case strings.HasPrefix(mysqlType, "time"): - return PropertyType{Type: "string", CustomFormat: "time", AirbyteType: "time_without_timezone"} - default: - return PropertyType{Type: "string"} - } -} - -func (p PlanetScaleEdgeDatabase) Close() error { - return p.Mysql.Close() -} - -func (p PlanetScaleEdgeDatabase) ListShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - return p.Mysql.GetVitessShards(ctx, psc) -} - -// Read streams rows from a table given a starting cursor. -// 1. We will get the latest vgtid for a given table in a shard when a sync session starts. -// 2. This latest vgtid is now the stopping point for this sync session. -// 3. Ask vstream to stream from the last known vgtid -// 4. When we reach the stopping point, read all rows available at this vgtid -// 5. End the stream when (a) a vgtid newer than latest vgtid is encountered or (b) the timeout kicks in. -func (p PlanetScaleEdgeDatabase) Read(ctx context.Context, w io.Writer, ps PlanetScaleSource, s ConfiguredStream, lastKnownPosition *psdbconnect.TableCursor) (*SerializedCursor, error) { - var ( - err error - sErr error - currentSerializedCursor *SerializedCursor - ) - - tabletType := psdbconnect.TabletType_primary - if ps.UseReplica { - tabletType = psdbconnect.TabletType_replica - } - - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("Syncing from tabletType \"%v\"", TabletTypeToString(tabletType))) - - currentPosition := lastKnownPosition - table := s.Stream - readDuration := 1 * time.Minute - preamble := fmt.Sprintf("[%v:%v:%v shard : %v] ", table.Namespace, TabletTypeToString(tabletType), table.Name, currentPosition.Shard) - for { - p.Logger.Log(LOGLEVEL_INFO, preamble+"peeking to see if there's any new rows") - latestCursorPosition, lcErr := p.getLatestCursorPosition(ctx, currentPosition.Shard, currentPosition.Keyspace, table, ps, tabletType) - if lcErr != nil { - return currentSerializedCursor, errors.Wrap(err, "Unable to get latest cursor position") - } - - // the current vgtid is the same as the last synced vgtid, no new rows. - if latestCursorPosition == currentPosition.Position { - p.Logger.Log(LOGLEVEL_INFO, preamble+"no new rows found, exiting") - return TableCursorToSerializedCursor(currentPosition) - } - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("new rows found, syncing rows for %v", readDuration)) - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf(preamble+"syncing rows with cursor [%v]", currentPosition)) - - currentPosition, err = p.sync(ctx, currentPosition, latestCursorPosition, table, ps, tabletType, readDuration) - if currentPosition.Position != "" { - currentSerializedCursor, sErr = TableCursorToSerializedCursor(currentPosition) - if sErr != nil { - // if we failed to serialize here, we should bail. - return currentSerializedCursor, errors.Wrap(sErr, "unable to serialize current position") - } - } - if err != nil { - if s, ok := status.FromError(err); ok { - // if the error is anything other than server timeout, keep going - if s.Code() != codes.DeadlineExceeded { - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("%v Got error [%v], Returning with cursor :[%v] after server timeout", preamble, s.Code(), currentPosition)) - return currentSerializedCursor, nil - } else { - p.Logger.Log(LOGLEVEL_INFO, preamble+"Continuing with cursor after server timeout") - } - } else if errors.Is(err, io.EOF) { - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("%vFinished reading all rows for table [%v]", preamble, table.Name)) - return currentSerializedCursor, nil - } else { - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("non-grpc error [%v]]", err)) - return currentSerializedCursor, err - } - } - } -} - -func (p PlanetScaleEdgeDatabase) sync(ctx context.Context, tc *psdbconnect.TableCursor, stopPosition string, s Stream, ps PlanetScaleSource, tabletType psdbconnect.TabletType, readDuration time.Duration) (*psdbconnect.TableCursor, error) { - defer p.Logger.Flush() - ctx, cancel := context.WithTimeout(ctx, readDuration) - defer cancel() - - var ( - err error - client psdbconnect.ConnectClient - ) - - if p.clientFn == nil { - conn, err := grpcclient.Dial(ctx, ps.Host, - clientoptions.WithDefaultTLSConfig(), - clientoptions.WithCompression(true), - clientoptions.WithConnectionPool(1), - clientoptions.WithExtraCallOption( - auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), - ), - ) - if err != nil { - return tc, err - } - defer conn.Close() - client = psdbconnect.NewConnectClient(conn) - } else { - client, err = p.clientFn(ctx, ps) - if err != nil { - return tc, err - } - } - - if tc.LastKnownPk != nil { - tc.Position = "" - } - - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("Syncing with cursor position : [%v], using last known PK : %v, stop cursor is : [%v]", tc.Position, tc.LastKnownPk != nil, stopPosition)) - - sReq := &psdbconnect.SyncRequest{ - TableName: s.Name, - Cursor: tc, - TabletType: tabletType, - Cells: []string{"planetscale_operator_default"}, - } - p.Logger.Log(LOGLEVEL_INFO, fmt.Sprintf("DEBUG: SyncRequest.Cells = %v", sReq.GetCells())) - - c, err := client.Sync(ctx, sReq) - if err != nil { - return tc, err - } - - keyspaceOrDatabase := s.Namespace - if keyspaceOrDatabase == "" { - keyspaceOrDatabase = ps.Database - } - - // stop when we've reached the well known stop position for this sync session. - watchForVgGtidChange := false - - for { - - res, err := c.Recv() - if err != nil { - return tc, err - } - - if res.Cursor != nil { - tc = res.Cursor - } - - // Because of the ordering of events in a vstream - // we receive the vgtid event first and then the rows. - // the vgtid event might repeat, but they're ordered. - // so we once we reach the desired stop vgtid, we stop the sync session - // if we get a newer vgtid. - watchForVgGtidChange = watchForVgGtidChange || tc.Position == stopPosition - - if len(res.Result) > 0 { - for _, result := range res.Result { - qr := sqltypes.Proto3ToResult(result) - for _, row := range qr.Rows { - sqlResult := &sqltypes.Result{ - Fields: result.Fields, - } - sqlResult.Rows = append(sqlResult.Rows, row) - // print AirbyteRecord messages to stdout here. - p.printQueryResult(sqlResult, keyspaceOrDatabase, s.Name) - } - } - } - - if watchForVgGtidChange && tc.Position != stopPosition { - return tc, io.EOF - } - } -} - -func (p PlanetScaleEdgeDatabase) getLatestCursorPosition(ctx context.Context, shard, keyspace string, s Stream, ps PlanetScaleSource, tabletType psdbconnect.TabletType) (string, error) { - defer p.Logger.Flush() - timeout := 45 * time.Second - ctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - var ( - err error - client psdbconnect.ConnectClient - ) - - if p.clientFn == nil { - conn, err := grpcclient.Dial(ctx, ps.Host, - clientoptions.WithDefaultTLSConfig(), - clientoptions.WithCompression(true), - clientoptions.WithConnectionPool(1), - clientoptions.WithExtraCallOption( - auth.NewBasicAuth(ps.Username, ps.Password).CallOption(), - ), - ) - if err != nil { - return "", err - } - defer conn.Close() - client = psdbconnect.NewConnectClient(conn) - } else { - client, err = p.clientFn(ctx, ps) - if err != nil { - return "", err - } - } - - sReq := &psdbconnect.SyncRequest{ - TableName: s.Name, - Cursor: &psdbconnect.TableCursor{ - Shard: shard, - Keyspace: keyspace, - Position: "current", - }, - TabletType: tabletType, - Cells: []string{"planetscale_operator_default"}, - } - - c, err := client.Sync(ctx, sReq) - if err != nil { - return "", nil - } - - for { - res, err := c.Recv() - if err != nil { - return "", err - } - - if res.Cursor != nil { - return res.Cursor.Position, nil - } - } -} - -// printQueryResult will pretty-print an AirbyteRecordMessage to the logger. -// Copied from vtctl/query.go -func (p PlanetScaleEdgeDatabase) printQueryResult(qr *sqltypes.Result, tableNamespace, tableName string) { - data := QueryResultToRecords(qr) - - for _, record := range data { - p.Logger.Record(tableNamespace, tableName, record) - } -} diff --git a/cmd/internal/planetscale_edge_database_test.go b/cmd/internal/planetscale_edge_database_test.go deleted file mode 100644 index 57e7549..0000000 --- a/cmd/internal/planetscale_edge_database_test.go +++ /dev/null @@ -1,593 +0,0 @@ -package internal - -import ( - "bytes" - "context" - "fmt" - "os" - "testing" - - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/stretchr/testify/assert" - "google.golang.org/grpc" - "vitess.io/vitess/go/sqltypes" - "vitess.io/vitess/go/vt/proto/query" -) - -func TestRead_CanPeekBeforeRead(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - { - Cursor: tc, - }, - { - Cursor: tc, - }, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{} - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.False(t, tma.PingContextFnInvoked) - assert.False(t, tma.GetVitessTabletsFnInvoked) -} - -func TestRead_CanEarlyExitIfNoNewVGtidInPeek(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{} - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) -} - -func TestRead_CanPickPrimaryForShardedKeyspaces(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "40-80", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - assert.Contains(t, in.Cells, "planetscale_operator_default") - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.False(t, tma.PingContextFnInvoked) - assert.False(t, tma.GetVitessTabletsFnInvoked) -} - -func TestRead_CanPickReplicaForShardedKeyspaces(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "40-80", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_replica, in.TabletType) - assert.Contains(t, in.Cells, "planetscale_operator_default") - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - UseReplica: true, - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.False(t, tma.PingContextFnInvoked) - assert.False(t, tma.GetVitessTabletsFnInvoked) -} - -func TestRead_CanPickPrimaryForUnshardedKeyspaces(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - { - Cursor: tc, - }, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - assert.Contains(t, in.Cells, "planetscale_operator_default") - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.False(t, tma.PingContextFnInvoked) - assert.False(t, tma.GetVitessTabletsFnInvoked) -} - -func TestRead_CanPickReplicaForUnshardedKeyspaces(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - { - Cursor: tc, - }, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_replica, in.TabletType) - assert.Contains(t, in.Cells, "planetscale_operator_default") - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - UseReplica: true, - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) - assert.False(t, tma.PingContextFnInvoked) - assert.False(t, tma.GetVitessTabletsFnInvoked) -} - -func TestRead_CanReturnOriginalCursorIfNoNewFound(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: tc}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(tc) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 1, cc.syncFnInvokedCount) -} - -func TestRead_CanReturnNewCursorIfNewFound(t *testing.T) { - tma := getTestMysqlAccess() - b := bytes.NewBufferString("") - ped := PlanetScaleEdgeDatabase{ - Logger: NewLogger(b), - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - newTC := &psdbconnect.TableCursor{ - Shard: "-", - Position: "I_AM_FARTHER_IN_THE_BINLOG", - Keyspace: "connect-test", - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: newTC}, - {Cursor: newTC}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - esc, err := TableCursorToSerializedCursor(newTC) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 2, cc.syncFnInvokedCount) -} - -func TestRead_CanStopAtWellKnownCursor(t *testing.T) { - tma := getTestMysqlAccess() - tal := testAirbyteLogger{} - ped := PlanetScaleEdgeDatabase{ - Logger: &tal, - Mysql: tma, - } - - numResponses := 10 - // when the client tries to get the "current" vgtid, - // we return the ante-penultimate element of the array. - currentVGtidPosition := (numResponses * 3) - 4 - // this is the next vgtid that should stop the sync session. - nextVGtidPosition := currentVGtidPosition + 1 - responses := make([]*psdbconnect.SyncResponse, 0, numResponses) - for i := 0; i < numResponses; i++ { - // this simulates multiple events being returned, for the same vgtid, from vstream - for x := 0; x < 3; x++ { - var result []*query.QueryResult - if x == 2 { - result = []*query.QueryResult{ - sqltypes.ResultToProto3(sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "pid|description", - "int64|varbinary"), - fmt.Sprintf("%v|keyboard", i+1), - fmt.Sprintf("%v|monitor", i+2), - )), - } - } - - vgtid := fmt.Sprintf("e4e20f06-e28f-11ec-8d20-8e7ac09cb64c:1-%v", i) - responses = append(responses, &psdbconnect.SyncResponse{ - Cursor: &psdbconnect.TableCursor{ - Shard: "-", - Keyspace: "connect-test", - Position: vgtid, - }, - Result: result, - }) - } - } - - syncClient := &connectSyncClientMock{ - syncResponses: responses, - } - - getCurrentVGtidClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - responses[currentVGtidPosition], - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - if in.Cursor.Position == "current" { - return getCurrentVGtidClient, nil - } - - return syncClient, nil - }, - } - - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "customers", - Namespace: "connect-test", - }, - } - - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, responses[0].Cursor) - assert.NoError(t, err) - // sync should start at the first vgtid - esc, err := TableCursorToSerializedCursor(responses[nextVGtidPosition].Cursor) - assert.NoError(t, err) - assert.Equal(t, esc, sc) - assert.Equal(t, 2, cc.syncFnInvokedCount) - - logLines := tal.logMessages[LOGLEVEL_INFO] - assert.Equal(t, "[connect-test:primary:customers shard : -] Finished reading all rows for table [customers]", logLines[len(logLines)-1]) - records := tal.records["connect-test.customers"] - assert.Equal(t, 2*(nextVGtidPosition/3), len(records)) -} - -func TestRead_CanLogResults(t *testing.T) { - tma := getTestMysqlAccess() - tal := testAirbyteLogger{} - ped := PlanetScaleEdgeDatabase{ - Logger: &tal, - Mysql: tma, - } - tc := &psdbconnect.TableCursor{ - Shard: "-", - Position: "THIS_IS_A_SHARD_GTID", - Keyspace: "connect-test", - } - newTC := &psdbconnect.TableCursor{ - Shard: "-", - Position: "I_AM_FARTHER_IN_THE_BINLOG", - Keyspace: "connect-test", - } - - result := []*query.QueryResult{ - sqltypes.ResultToProto3(sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "pid|description", - "int64|varbinary"), - "1|keyboard", - "2|monitor", - )), - } - - syncClient := &connectSyncClientMock{ - syncResponses: []*psdbconnect.SyncResponse{ - {Cursor: newTC, Result: result}, - {Cursor: newTC, Result: result}, - }, - } - - cc := clientConnectionMock{ - syncFn: func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - assert.Equal(t, psdbconnect.TabletType_primary, in.TabletType) - return syncClient, nil - }, - } - ped.clientFn = func(ctx context.Context, ps PlanetScaleSource) (psdbconnect.ConnectClient, error) { - return &cc, nil - } - ps := PlanetScaleSource{ - Database: "connect-test", - } - cs := ConfiguredStream{ - Stream: Stream{ - Name: "products", - Namespace: "connect-test", - }, - } - sc, err := ped.Read(context.Background(), os.Stdout, ps, cs, tc) - assert.NoError(t, err) - assert.NotNil(t, sc) - assert.Equal(t, 2, len(tal.records["connect-test.products"])) - records := tal.records["connect-test.products"] - keyboardFound := false - monitorFound := false - for _, r := range records { - id, err := r["pid"].(sqltypes.Value).ToInt64() - assert.NoError(t, err) - if id == 1 { - assert.False(t, keyboardFound, "should not find keyboard twice") - keyboardFound = true - assert.Equal(t, "keyboard", r["description"].(sqltypes.Value).ToString()) - } - - if id == 2 { - assert.False(t, monitorFound, "should not find monitor twice") - monitorFound = true - assert.Equal(t, "monitor", r["description"].(sqltypes.Value).ToString()) - } - } - assert.True(t, keyboardFound) - assert.True(t, monitorFound) -} - -func getTestMysqlAccess() *mysqlAccessMock { - tma := mysqlAccessMock{ - PingContextFn: func(ctx context.Context, source PlanetScaleSource) error { - return nil - }, - GetVitessTabletsFn: func(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) { - return []VitessTablet{ - { - Cell: "test_cell_primary", - Keyspace: "connect-test", - TabletType: TabletTypeToString(psdbconnect.TabletType_primary), - State: "SERVING", - }, - { - Cell: "test_cell_replica", - Keyspace: "connect-test", - TabletType: TabletTypeToString(psdbconnect.TabletType_replica), - State: "SERVING", - }, - }, nil - }, - } - return &tma -} diff --git a/cmd/internal/planetscale_edge_mysql.go b/cmd/internal/planetscale_edge_mysql.go deleted file mode 100644 index 9a633fa..0000000 --- a/cmd/internal/planetscale_edge_mysql.go +++ /dev/null @@ -1,106 +0,0 @@ -package internal - -import ( - "context" - "database/sql" - "github.com/pkg/errors" - "strings" - "time" -) - -type VitessTablet struct { - Cell string - Keyspace string - Shard string - TabletType string - State string - Alias string - Hostname string - PrimaryTermStartTime string -} -type PlanetScaleEdgeMysqlAccess interface { - PingContext(context.Context, PlanetScaleSource) error - GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) - GetVitessTablets(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) - Close() error -} - -func NewMySQL(psc *PlanetScaleSource) (PlanetScaleEdgeMysqlAccess, error) { - db, err := sql.Open("mysql", psc.DSN()) - if err != nil { - return nil, err - } - - return planetScaleEdgeMySQLAccess{ - db: db, - }, nil -} - -type planetScaleEdgeMySQLAccess struct { - db *sql.DB -} - -func (p planetScaleEdgeMySQLAccess) Close() error { - return p.db.Close() -} - -func (p planetScaleEdgeMySQLAccess) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - var shards []string - - // TODO: is there a prepared statement equivalent? - shardNamesQR, err := p.db.QueryContext( - ctx, - `show vitess_shards like "%`+psc.Database+`%";`, - ) - if err != nil { - return shards, errors.Wrap(err, "Unable to query database for shards") - } - - for shardNamesQR.Next() { - var name string - if err = shardNamesQR.Scan(&name); err != nil { - return shards, errors.Wrap(err, "unable to get shard names") - } - - shards = append(shards, strings.TrimPrefix(name, psc.Database+"/")) - } - - if err := shardNamesQR.Err(); err != nil { - return shards, errors.Wrapf(err, "unable to iterate shard names for %s", psc.Database) - } - return shards, nil -} - -func (p planetScaleEdgeMySQLAccess) GetVitessTablets(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) { - var tablets []VitessTablet - - tabletsQR, err := p.db.QueryContext(ctx, "Show vitess_tablets") - if err != nil { - return tablets, err - } - - for tabletsQR.Next() { - vt := VitessTablet{} - // output is of the form : - //aws_useast1c_5 connect-test - PRIMARY SERVING aws_useast1c_5-2797914161 10.200.131.217 2022-05-09T14:11:56Z - //aws_useast1c_5 connect-test - REPLICA SERVING aws_useast1c_5-1559247072 10.200.178.136 - //aws_useast1c_5 connect-test - PRIMARY SERVING aws_useast1c_5-2797914161 10.200.131.217 2022-05-09T14:11:56Z - //aws_useast1c_5 connect-test - REPLICA SERVING aws_useast1c_5-1559247072 10.200.178.136 - err := tabletsQR.Scan(&vt.Cell, &vt.Keyspace, &vt.Shard, &vt.TabletType, &vt.State, &vt.Alias, &vt.Hostname, &vt.PrimaryTermStartTime) - if err != nil { - return tablets, err - } - tablets = append(tablets, vt) - } - if err := tabletsQR.Err(); err != nil { - return tablets, errors.Wrapf(err, "unable to iterate tablets for %s", psc.Database) - } - return tablets, nil -} - -func (p planetScaleEdgeMySQLAccess) PingContext(ctx context.Context, psc PlanetScaleSource) error { - - ctx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - return p.db.PingContext(ctx) -} From 9a1e5b9233e8f84901e240281c4d7c3500e6f058 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Fri, 5 Jan 2024 09:42:00 -0600 Subject: [PATCH 08/13] Can round-trip state from lib --- cmd/airbyte-source/read.go | 17 ++++++--- cmd/airbyte-source/test_types.go | 3 +- cmd/internal/mock_types.go | 11 +----- cmd/internal/planetscale_connection.go | 3 +- cmd/internal/types.go | 51 +++++++++++++------------- 5 files changed, 43 insertions(+), 42 deletions(-) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index 80f94d7..634de9f 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -119,8 +119,8 @@ func ReadCommand(ch *Helper) *cobra.Command { os.Exit(1) } - for _, shardState := range streamState.Shards { - tc, err := shardState.SerializedCursorToTableCursor(table) + for shardName, shardState := range streamState.Shards { + tc, err := shardState.SerializedCursorToTableCursor() if err != nil { ch.Logger.Error(fmt.Sprintf("invalid cursor for stream %v, failed with [%v]", streamStateKey, err)) os.Exit(1) @@ -138,8 +138,13 @@ func ReadCommand(ch *Helper) *cobra.Command { onUpdate := func(*lib.UpdatedRow) error { return nil } - onCursor := func(*psdbconnect.TableCursor) error { - //syncState.Streams[streamStateKey].Shards[shardName] = sc + + onCursor := func(tc *psdbconnect.TableCursor) error { + sc, err := lib.TableCursorToSerializedCursor(tc) + if err != nil { + return err + } + syncState.Streams[streamStateKey].Shards[shardName] = sc ch.Logger.Flush() ch.Logger.State(syncState) return nil @@ -153,8 +158,9 @@ func ReadCommand(ch *Helper) *cobra.Command { if sc != nil { // if we get any new state, we assign it here. // otherwise, the older state is round-tripped back to Airbyte. - //syncState.Streams[streamStateKey].Shards[shardName] = sc + syncState.Streams[streamStateKey].Shards[shardName] = sc } + ch.Logger.Flush() ch.Logger.State(syncState) } @@ -183,6 +189,7 @@ func readState(state string, psc internal.PlanetScaleSource, streams []internal. } for _, s := range streams { + keyspaceOrDatabase := s.Stream.Namespace if keyspaceOrDatabase == "" { keyspaceOrDatabase = psc.Database diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index de7a3c6..e617ba6 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -2,6 +2,7 @@ package airbyte_source import ( "context" + "github.com/planetscale/connectsdk/lib" "io" "github.com/planetscale/airbyte-source/cmd/internal" @@ -39,7 +40,7 @@ func (td testDatabase) HasTabletType(ctx context.Context, psc internal.PlanetSca return true, nil } -func (td testDatabase) Read(ctx context.Context, w io.Writer, ps internal.PlanetScaleSource, s internal.ConfiguredStream, tc *psdbconnect.TableCursor) (*internal.SerializedCursor, error) { +func (td testDatabase) Read(ctx context.Context, w io.Writer, ps internal.PlanetScaleSource, s internal.ConfiguredStream, tc *psdbconnect.TableCursor) (*lib.SerializedCursor, error) { // TODO implement me panic("implement me") } diff --git a/cmd/internal/mock_types.go b/cmd/internal/mock_types.go index 88809f8..932f5c1 100644 --- a/cmd/internal/mock_types.go +++ b/cmd/internal/mock_types.go @@ -77,10 +77,8 @@ func (c *clientConnectionMock) Sync(ctx context.Context, in *psdbconnect.SyncReq } type mysqlAccessMock struct { - PingContextFn func(ctx context.Context, source PlanetScaleSource) error - PingContextFnInvoked bool - GetVitessTabletsFn func(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) - GetVitessTabletsFnInvoked bool + PingContextFn func(ctx context.Context, source PlanetScaleSource) error + PingContextFnInvoked bool } func (tma *mysqlAccessMock) PingContext(ctx context.Context, source PlanetScaleSource) error { @@ -108,11 +106,6 @@ func (mysqlAccessMock) QueryContext(ctx context.Context, psc PlanetScaleSource, panic("implement me") } -func (tma *mysqlAccessMock) GetVitessTablets(ctx context.Context, psc PlanetScaleSource) ([]VitessTablet, error) { - tma.GetVitessTabletsFnInvoked = true - return tma.GetVitessTabletsFn(ctx, psc) -} - func (mysqlAccessMock) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { //TODO implement me panic("implement me") diff --git a/cmd/internal/planetscale_connection.go b/cmd/internal/planetscale_connection.go index 40b2298..1d360cd 100644 --- a/cmd/internal/planetscale_connection.go +++ b/cmd/internal/planetscale_connection.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/go-sql-driver/mysql" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/connectsdk/lib" "os" "strings" ) @@ -50,7 +51,7 @@ func (psc PlanetScaleSource) DSN() string { // This state can be round-tripped safely with Airbyte. func (psc PlanetScaleSource) GetInitialState(keyspaceOrDatabase string, shards []string) (ShardStates, error) { shardCursors := ShardStates{ - Shards: map[string]*SerializedCursor{}, + Shards: map[string]*lib.SerializedCursor{}, } if len(psc.Shards) > 0 { diff --git a/cmd/internal/types.go b/cmd/internal/types.go index 9929299..7814e39 100644 --- a/cmd/internal/types.go +++ b/cmd/internal/types.go @@ -2,6 +2,7 @@ package internal import ( "encoding/base64" + "github.com/planetscale/connectsdk/lib" "regexp" "strconv" "strings" @@ -95,37 +96,35 @@ type SyncState struct { } type ShardStates struct { - Shards map[string]*SerializedCursor `json:"shards"` -} - -type SerializedCursor struct { - Cursor string `json:"cursor"` -} - -func (s SerializedCursor) SerializedCursorToTableCursor(table ConfiguredStream) (*psdbconnect.TableCursor, error) { - var ( - tc psdbconnect.TableCursor - ) - decoded, err := base64.StdEncoding.DecodeString(s.Cursor) - if err != nil { - return nil, errors.Wrap(err, "unable to decode table cursor") - } - - err = codec.DefaultCodec.Unmarshal(decoded, &tc) - if err != nil { - return nil, errors.Wrap(err, "unable to deserialize table cursor") - } - - return &tc, nil -} - -func TableCursorToSerializedCursor(cursor *psdbconnect.TableCursor) (*SerializedCursor, error) { + Shards map[string]*lib.SerializedCursor `json:"shards"` +} + +// +//func (s lib.SerializedCursor) SerializedCursorToTableCursor(table ConfiguredStream) (*psdbconnect.TableCursor, error) { +// var ( +// tc psdbconnect.TableCursor +// ) +// s. +// decoded, err := base64.StdEncoding.DecodeString(s.Cursor) +// if err != nil { +// return nil, errors.Wrap(err, "unable to decode table cursor") +// } +// +// err = codec.DefaultCodec.Unmarshal(decoded, &tc) +// if err != nil { +// return nil, errors.Wrap(err, "unable to deserialize table cursor") +// } +// +// return &tc, nil +//} + +func TableCursorToSerializedCursor(cursor *psdbconnect.TableCursor) (*lib.SerializedCursor, error) { d, err := codec.DefaultCodec.Marshal(cursor) if err != nil { return nil, errors.Wrap(err, "unable to marshal table cursor to save staate.") } - sc := &SerializedCursor{ + sc := &lib.SerializedCursor{ Cursor: base64.StdEncoding.EncodeToString(d), } return sc, nil From f0fc8c3e65e5145d456db3cde63532407a920de3 Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Fri, 5 Jan 2024 10:00:10 -0600 Subject: [PATCH 09/13] fix all broken tests --- cmd/airbyte-source/check_test.go | 23 ++-- cmd/airbyte-source/discover.go | 16 +-- cmd/airbyte-source/discover_test.go | 21 ++-- cmd/airbyte-source/helper.go | 4 + cmd/airbyte-source/test_types.go | 52 ++++++--- cmd/internal/mock_types.go | 113 -------------------- cmd/internal/planetscale_connection.go | 45 -------- cmd/internal/planetscale_connection_test.go | 30 +----- cmd/internal/schema_builder.go | 1 - cmd/internal/types_test.go | 2 +- 10 files changed, 73 insertions(+), 234 deletions(-) delete mode 100644 cmd/internal/mock_types.go diff --git a/cmd/airbyte-source/check_test.go b/cmd/airbyte-source/check_test.go index 5e4e4d1..3613d12 100644 --- a/cmd/airbyte-source/check_test.go +++ b/cmd/airbyte-source/check_test.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "github.com/planetscale/connectsdk/lib" "os" "testing" @@ -27,7 +28,6 @@ func TestCheckInvalidCatalogJSON(t *testing.T) { content: []byte("i am not json"), } checkCommand := CheckCommand(&Helper{ - Database: internal.PlanetScaleEdgeDatabase{}, FileReader: tfr, Logger: internal.NewLogger(os.Stdout), }) @@ -50,16 +50,16 @@ func TestCheckCredentialsInvalid(t *testing.T) { content: []byte("{\"host\": \"something.us-east-3.psdb.cloud\",\"database\":\"database\",\"username\":\"username\",\"password\":\"password\"}"), } - td := testDatabase{ + td := testConnectClient{ connectResponse: canConnectResponse{ err: fmt.Errorf("[%v] is invalid", "username"), }, } checkCommand := CheckCommand(&Helper{ - Database: td, - FileReader: tfr, - Logger: internal.NewLogger(os.Stdout), + ConnectClient: td, + FileReader: tfr, + Logger: internal.NewLogger(os.Stdout), }) b := bytes.NewBufferString("") checkCommand.SetOut(b) @@ -80,16 +80,21 @@ func TestCheckExecuteSuccessful(t *testing.T) { content: []byte("{\"host\": \"something.us-east-3.psdb.cloud\",\"database\":\"database\",\"username\":\"username\",\"password\":\"password\"}"), } - td := testDatabase{ + td := testConnectClient{ connectResponse: canConnectResponse{ err: nil, }, } checkCommand := CheckCommand(&Helper{ - Database: td, - FileReader: tfr, - Logger: internal.NewLogger(os.Stdout), + ConnectClient: td, + FileReader: tfr, + Source: lib.PlanetScaleSource{ + Host: "something.us-east-3.psdb.cloud", + Database: "database", + Username: "username", + }, + Logger: internal.NewLogger(os.Stdout), }) b := bytes.NewBufferString("") checkCommand.SetOut(b) diff --git a/cmd/airbyte-source/discover.go b/cmd/airbyte-source/discover.go index 4d88484..fa485d2 100644 --- a/cmd/airbyte-source/discover.go +++ b/cmd/airbyte-source/discover.go @@ -6,7 +6,6 @@ import ( "os" "github.com/planetscale/airbyte-source/cmd/internal" - "github.com/planetscale/connectsdk/lib" "github.com/spf13/cobra" ) @@ -47,22 +46,13 @@ func DiscoverCommand(ch *Helper) *cobra.Command { return } - libpsc := lib.PlanetScaleSource{ - UseReplica: true, - Username: psc.Username, - Database: psc.Database, - Host: psc.Host, - Password: psc.Password, - TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, - } - mc, err := lib.NewMySQL(&libpsc) - if err != nil { - ch.Logger.Log(internal.LOGLEVEL_ERROR, fmt.Sprintf("Unable to discover database, failed with [%v]", err)) + if err := ch.EnsureDB(psc); err != nil { + fmt.Fprintln(cmd.OutOrStdout(), "Unable to connect to PlanetScale Database") return } sb := internal.NewSchemaBuilder() - if err := mc.BuildSchema(context.Background(), libpsc, sb); err != nil { + if err := ch.MysqlClient.BuildSchema(context.Background(), ch.Source, sb); err != nil { ch.Logger.Log(internal.LOGLEVEL_ERROR, fmt.Sprintf("Unable to discover database, failed with [%v]", err)) return } diff --git a/cmd/airbyte-source/discover_test.go b/cmd/airbyte-source/discover_test.go index 90253b6..a6b7371 100644 --- a/cmd/airbyte-source/discover_test.go +++ b/cmd/airbyte-source/discover_test.go @@ -15,7 +15,7 @@ func TestDiscoverInvalidSource(t *testing.T) { tfr := testFileReader{ content: []byte("{\"host\": \"something.us-east-3.psdb.cloud\",\"database\":\"database\",\"username\":\"username\",\"password\":\"password\"}"), } - td := testDatabase{ + td := testConnectClient{ connectResponse: canConnectResponse{ err: fmt.Errorf("[%v] is invalid", "username"), }, @@ -23,9 +23,9 @@ func TestDiscoverInvalidSource(t *testing.T) { b := bytes.NewBufferString("") discover := DiscoverCommand(&Helper{ - Database: td, - FileReader: tfr, - Logger: internal.NewLogger(b), + ConnectClient: td, + FileReader: tfr, + Logger: internal.NewLogger(b), }) discover.SetArgs([]string{"config source.json"}) @@ -46,19 +46,22 @@ func TestDiscoverFailed(t *testing.T) { tfr := testFileReader{ content: []byte("{\"host\": \"something.us-east-3.psdb.cloud\",\"database\":\"database\",\"username\":\"username\",\"password\":\"password\"}"), } - td := testDatabase{ + td := testConnectClient{ connectResponse: canConnectResponse{ err: nil, }, - discoverSchemaResponse: discoverSchemaResponse{ + } + tmc := testMysqlClient{ + buildSchemaResponse: buildSchemaResponse{ err: fmt.Errorf("unable to get catalog for %v", "keyspace"), }, } b := bytes.NewBufferString("") discover := DiscoverCommand(&Helper{ - Database: td, - FileReader: tfr, - Logger: internal.NewLogger(b), + ConnectClient: td, + MysqlClient: tmc, + FileReader: tfr, + Logger: internal.NewLogger(b), }) discover.SetArgs([]string{"config source.json"}) diff --git a/cmd/airbyte-source/helper.go b/cmd/airbyte-source/helper.go index 29fc469..12b436f 100644 --- a/cmd/airbyte-source/helper.go +++ b/cmd/airbyte-source/helper.go @@ -34,6 +34,10 @@ func DefaultHelper(w io.Writer) *Helper { } func (h *Helper) EnsureDB(psc internal.PlanetScaleSource) error { + if h.ConnectClient != nil { + return nil + } + h.Source = lib.PlanetScaleSource{ UseReplica: true, Username: psc.Username, diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index e617ba6..8eaff71 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -2,11 +2,8 @@ package airbyte_source import ( "context" - "github.com/planetscale/connectsdk/lib" - "io" - - "github.com/planetscale/airbyte-source/cmd/internal" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/connectsdk/lib" ) type testFileReader struct { @@ -22,33 +19,54 @@ type canConnectResponse struct { err error } -type discoverSchemaResponse struct { - catalog internal.Catalog - err error -} - -type testDatabase struct { - connectResponse canConnectResponse - discoverSchemaResponse discoverSchemaResponse +type testConnectClient struct { + connectResponse canConnectResponse } -func (td testDatabase) CanConnect(ctx context.Context, ps internal.PlanetScaleSource) error { +func (td testConnectClient) CanConnect(ctx context.Context, ps lib.PlanetScaleSource) error { return td.connectResponse.err } -func (td testDatabase) HasTabletType(ctx context.Context, psc internal.PlanetScaleSource, tt psdbconnect.TabletType) (bool, error) { +func (td testConnectClient) HasTabletType(ctx context.Context, psc lib.PlanetScaleSource, tt psdbconnect.TabletType) (bool, error) { return true, nil } -func (td testDatabase) Read(ctx context.Context, w io.Writer, ps internal.PlanetScaleSource, s internal.ConfiguredStream, tc *psdbconnect.TableCursor) (*lib.SerializedCursor, error) { +func (td testConnectClient) Read(context.Context, lib.DatabaseLogger, lib.PlanetScaleSource, string, []string, *psdbconnect.TableCursor, lib.OnResult, lib.OnCursor, lib.OnUpdate) (*lib.SerializedCursor, error) { // TODO implement me panic("implement me") } -func (td testDatabase) Close() error { +func (td testConnectClient) Close() error { return nil } -func (td testDatabase) ListShards(ctx context.Context, ps internal.PlanetScaleSource) ([]string, error) { +func (td testConnectClient) ListShards(ctx context.Context, ps lib.PlanetScaleSource) ([]string, error) { + panic("implement me") +} + +type buildSchemaResponse struct { + err error +} + +type testMysqlClient struct { + buildSchemaResponse buildSchemaResponse +} + +func (tmc testMysqlClient) BuildSchema(ctx context.Context, psc lib.PlanetScaleSource, schemaBuilder lib.SchemaBuilder) error { + return tmc.buildSchemaResponse.err +} + +func (tmc testMysqlClient) PingContext(ctx context.Context, source lib.PlanetScaleSource) error { + //TODO implement me + panic("implement me") +} + +func (tmc testMysqlClient) GetVitessShards(ctx context.Context, psc lib.PlanetScaleSource) ([]string, error) { + //TODO implement me + panic("implement me") +} + +func (tmc testMysqlClient) Close() error { + //TODO implement me panic("implement me") } diff --git a/cmd/internal/mock_types.go b/cmd/internal/mock_types.go deleted file mode 100644 index 932f5c1..0000000 --- a/cmd/internal/mock_types.go +++ /dev/null @@ -1,113 +0,0 @@ -package internal - -import ( - "context" - "database/sql" - psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "google.golang.org/grpc" - "io" -) - -type testAirbyteLogger struct { - logMessages map[string][]string - records map[string][]map[string]interface{} -} - -func (tal *testAirbyteLogger) Log(level, message string) { - if tal.logMessages == nil { - tal.logMessages = map[string][]string{} - } - tal.logMessages[level] = append(tal.logMessages[level], message) -} - -func (testAirbyteLogger) Catalog(catalog Catalog) { - //TODO implement me - panic("implement me") -} - -func (testAirbyteLogger) ConnectionStatus(status ConnectionStatus) { - //TODO implement me - panic("implement me") -} - -func (tal *testAirbyteLogger) Record(tableNamespace, tableName string, data map[string]interface{}) { - if tal.records == nil { - tal.records = map[string][]map[string]interface{}{} - } - key := tableNamespace + "." + tableName - tal.records[key] = append(tal.records[key], data) -} - -func (testAirbyteLogger) Flush() { -} - -func (testAirbyteLogger) State(syncState SyncState) { - //TODO implement me - panic("implement me") -} - -func (testAirbyteLogger) Error(error string) { - //TODO implement me - panic("implement me") -} - -type clientConnectionMock struct { - syncFn func(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) - syncFnInvoked bool - syncFnInvokedCount int -} - -type connectSyncClientMock struct { - lastResponseSent int - syncResponses []*psdbconnect.SyncResponse - grpc.ClientStream -} - -func (x *connectSyncClientMock) Recv() (*psdbconnect.SyncResponse, error) { - if x.lastResponseSent >= len(x.syncResponses) { - return nil, io.EOF - } - x.lastResponseSent += 1 - return x.syncResponses[x.lastResponseSent-1], nil -} -func (c *clientConnectionMock) Sync(ctx context.Context, in *psdbconnect.SyncRequest, opts ...grpc.CallOption) (psdbconnect.Connect_SyncClient, error) { - c.syncFnInvoked = true - c.syncFnInvokedCount += 1 - return c.syncFn(ctx, in, opts...) -} - -type mysqlAccessMock struct { - PingContextFn func(ctx context.Context, source PlanetScaleSource) error - PingContextFnInvoked bool -} - -func (tma *mysqlAccessMock) PingContext(ctx context.Context, source PlanetScaleSource) error { - tma.PingContextFnInvoked = true - return tma.PingContextFn(ctx, source) -} - -func (mysqlAccessMock) GetTableNames(ctx context.Context, source PlanetScaleSource) ([]string, error) { - //TODO implement me - panic("implement me") -} - -func (mysqlAccessMock) GetTableSchema(ctx context.Context, source PlanetScaleSource, s string) (map[string]PropertyType, error) { - //TODO implement me - panic("implement me") -} - -func (mysqlAccessMock) GetTablePrimaryKeys(ctx context.Context, source PlanetScaleSource, s string) ([]string, error) { - //TODO implement me - panic("implement me") -} - -func (mysqlAccessMock) QueryContext(ctx context.Context, psc PlanetScaleSource, query string, args ...interface{}) (*sql.Rows, error) { - //TODO implement me - panic("implement me") -} - -func (mysqlAccessMock) GetVitessShards(ctx context.Context, psc PlanetScaleSource) ([]string, error) { - //TODO implement me - panic("implement me") -} -func (mysqlAccessMock) Close() error { return nil } diff --git a/cmd/internal/planetscale_connection.go b/cmd/internal/planetscale_connection.go index 1d360cd..cc245b6 100644 --- a/cmd/internal/planetscale_connection.go +++ b/cmd/internal/planetscale_connection.go @@ -2,10 +2,8 @@ package internal import ( "fmt" - "github.com/go-sql-driver/mysql" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" "github.com/planetscale/connectsdk/lib" - "os" "strings" ) @@ -24,29 +22,6 @@ type CustomSourceOptions struct { DoNotTreatTinyIntAsBoolean bool `json:"do_not_treat_tiny_int_as_boolean"` } -// DSN returns a DataSource that mysql libraries can use to connect to a PlanetScale database. -func (psc PlanetScaleSource) DSN() string { - config := mysql.NewConfig() - config.Net = "tcp" - config.Addr = psc.Host - config.User = psc.Username - config.DBName = psc.Database - config.Passwd = psc.Password - - tt := psdbconnect.TabletType_primary - if psc.UseReplica { - tt = psdbconnect.TabletType_replica - } - - if useSecureConnection() { - config.TLSConfig = "true" - config.DBName = fmt.Sprintf("%v@%v", psc.Database, TabletTypeToString(tt)) - } else { - config.TLSConfig = "skip-verify" - } - return config.FormatDSN() -} - // GetInitialState will return the initial/blank state for a given keyspace in all of its shards. // This state can be round-tripped safely with Airbyte. func (psc PlanetScaleSource) GetInitialState(keyspaceOrDatabase string, shards []string) (ShardStates, error) { @@ -83,23 +58,3 @@ func (psc PlanetScaleSource) GetInitialState(keyspaceOrDatabase string, shards [ return shardCursors, nil } - -func useSecureConnection() bool { - e2eTestRun, found := os.LookupEnv("PS_END_TO_END_TEST_RUN") - if found && (e2eTestRun == "yes" || - e2eTestRun == "y" || - e2eTestRun == "true" || - e2eTestRun == "1") { - return false - } - - return true -} - -func TabletTypeToString(t psdbconnect.TabletType) string { - if t == psdbconnect.TabletType_replica { - return "replica" - } - - return "primary" -} diff --git a/cmd/internal/planetscale_connection_test.go b/cmd/internal/planetscale_connection_test.go index 17d8204..dbd6c84 100644 --- a/cmd/internal/planetscale_connection_test.go +++ b/cmd/internal/planetscale_connection_test.go @@ -2,33 +2,11 @@ package internal import ( psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/connectsdk/lib" "github.com/stretchr/testify/assert" "testing" ) -func TestCanGenerateSecureDSN(t *testing.T) { - psc := PlanetScaleSource{ - Host: "useast.psdb.connect", - Username: "usernameus-east-4", - Password: "pscale_password", - Database: "connect-test", - } - dsn := psc.DSN() - assert.Equal(t, "usernameus-east-4:pscale_password@tcp(useast.psdb.connect)/connect-test@primary?tls=true", dsn) -} - -func TestCanGenerateInsecureDSN(t *testing.T) { - psc := PlanetScaleSource{ - Host: "useast.psdb.connect", - Username: "usernameus-east-4", - Password: "pscale_password", - Database: "connect-test", - } - t.Setenv("PS_END_TO_END_TEST_RUN", "true") - dsn := psc.DSN() - assert.Equal(t, "usernameus-east-4:pscale_password@tcp(useast.psdb.connect)/connect-test?tls=skip-verify", dsn) -} - func TestCanGenerateInitialState_Sharded(t *testing.T) { psc := PlanetScaleSource{ Host: "useast.psdb.connect", @@ -45,7 +23,7 @@ func TestCanGenerateInitialState_Sharded(t *testing.T) { shardStates, err := psc.GetInitialState("connect-test", shards) assert.NoError(t, err) expectedShardStates := ShardStates{ - Shards: map[string]*SerializedCursor{}, + Shards: map[string]*lib.SerializedCursor{}, } for _, shard := range shards { @@ -82,7 +60,7 @@ func TestCanGenerateInitialState_CustomShards(t *testing.T) { assert.Equal(t, len(configuredShards), len(shardStates.Shards)) expectedShardStates := ShardStates{ - Shards: map[string]*SerializedCursor{}, + Shards: map[string]*lib.SerializedCursor{}, } for _, shard := range configuredShards { @@ -112,7 +90,7 @@ func TestCanGenerateInitialState_Unsharded(t *testing.T) { shardStates, err := psc.GetInitialState("connect-test", shards) assert.NoError(t, err) expectedShardStates := ShardStates{ - Shards: map[string]*SerializedCursor{}, + Shards: map[string]*lib.SerializedCursor{}, } for _, shard := range shards { diff --git a/cmd/internal/schema_builder.go b/cmd/internal/schema_builder.go index 4403dd1..f31bcda 100644 --- a/cmd/internal/schema_builder.go +++ b/cmd/internal/schema_builder.go @@ -13,7 +13,6 @@ const ( var gcTableNameRegexp = regexp.MustCompile(gCTableNameExpression) type SchemaBuilder struct { - catalog *Catalog streams map[string]map[string]*Stream } diff --git a/cmd/internal/types_test.go b/cmd/internal/types_test.go index a477027..46e21e8 100644 --- a/cmd/internal/types_test.go +++ b/cmd/internal/types_test.go @@ -70,7 +70,7 @@ func TestCanUnmarshalLastKnownState(t *testing.T) { LastKnownPk: lastKnownPK, }) require.NoError(t, err) - tc, err := sc.SerializedCursorToTableCursor(ConfiguredStream{}) + tc, err := sc.SerializedCursorToTableCursor() require.NoError(t, err) assert.Equal(t, "connect", tc.Keyspace) assert.Equal(t, "40-80", tc.Shard) From 8feffc9540464027a2ce7e6ef97d00e14e24298f Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Fri, 5 Jan 2024 10:02:22 -0600 Subject: [PATCH 10/13] use premade connect client --- cmd/airbyte-source/read.go | 16 +--------------- cmd/internal/types.go | 19 ------------------- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index 634de9f..3e61e47 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -91,20 +91,6 @@ func ReadCommand(ch *Helper) *cobra.Command { os.Exit(1) } - libpsc := lib.PlanetScaleSource{ - UseReplica: false, - Username: psc.Username, - Database: psc.Database, - Host: psc.Host, - Password: psc.Password, - TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, - } - mc, err := lib.NewMySQL(&libpsc) - if err != nil { - ch.Logger.Error(fmt.Sprintf("Unable to read state : %v", err)) - os.Exit(1) - } - cc := lib.NewConnectClient(&mc) allColumns := []string{} for _, table := range catalog.Streams { @@ -150,7 +136,7 @@ func ReadCommand(ch *Helper) *cobra.Command { return nil } - sc, err := cc.Read(context.Background(), ch.Logger, libpsc, table.Stream.Name, allColumns, tc, onResult, onCursor, onUpdate) + sc, err := ch.ConnectClient.Read(context.Background(), ch.Logger, ch.Source, table.Stream.Name, allColumns, tc, onResult, onCursor, onUpdate) if err != nil { ch.Logger.Error(err.Error()) os.Exit(1) diff --git a/cmd/internal/types.go b/cmd/internal/types.go index 7814e39..9c5c6de 100644 --- a/cmd/internal/types.go +++ b/cmd/internal/types.go @@ -99,25 +99,6 @@ type ShardStates struct { Shards map[string]*lib.SerializedCursor `json:"shards"` } -// -//func (s lib.SerializedCursor) SerializedCursorToTableCursor(table ConfiguredStream) (*psdbconnect.TableCursor, error) { -// var ( -// tc psdbconnect.TableCursor -// ) -// s. -// decoded, err := base64.StdEncoding.DecodeString(s.Cursor) -// if err != nil { -// return nil, errors.Wrap(err, "unable to decode table cursor") -// } -// -// err = codec.DefaultCodec.Unmarshal(decoded, &tc) -// if err != nil { -// return nil, errors.Wrap(err, "unable to deserialize table cursor") -// } -// -// return &tc, nil -//} - func TableCursorToSerializedCursor(cursor *psdbconnect.TableCursor) (*lib.SerializedCursor, error) { d, err := codec.DefaultCodec.Marshal(cursor) if err != nil { From 7f2f1fd409ebbf5e1aa95843c87465ab6ddc7f2c Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Fri, 5 Jan 2024 10:52:58 -0600 Subject: [PATCH 11/13] more cleanup --- cmd/airbyte-source/read.go | 2 +- cmd/airbyte-source/test_types.go | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index 3e61e47..297b428 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -131,8 +131,8 @@ func ReadCommand(ch *Helper) *cobra.Command { return err } syncState.Streams[streamStateKey].Shards[shardName] = sc - ch.Logger.Flush() ch.Logger.State(syncState) + ch.Logger.Flush() return nil } diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index 8eaff71..05a0e82 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -27,10 +27,6 @@ func (td testConnectClient) CanConnect(ctx context.Context, ps lib.PlanetScaleSo return td.connectResponse.err } -func (td testConnectClient) HasTabletType(ctx context.Context, psc lib.PlanetScaleSource, tt psdbconnect.TabletType) (bool, error) { - return true, nil -} - func (td testConnectClient) Read(context.Context, lib.DatabaseLogger, lib.PlanetScaleSource, string, []string, *psdbconnect.TableCursor, lib.OnResult, lib.OnCursor, lib.OnUpdate) (*lib.SerializedCursor, error) { // TODO implement me panic("implement me") From 62d83dc38355b643090de903b82b813fca4c6bbf Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Fri, 5 Jan 2024 11:54:32 -0600 Subject: [PATCH 12/13] repo renamed --- cmd/airbyte-source/check.go | 2 +- cmd/airbyte-source/check_test.go | 2 +- cmd/airbyte-source/helper.go | 2 +- cmd/airbyte-source/read.go | 2 +- cmd/airbyte-source/test_types.go | 2 +- cmd/internal/planetscale_connection.go | 2 +- cmd/internal/planetscale_connection_test.go | 2 +- cmd/internal/schema_builder.go | 2 +- cmd/internal/types.go | 2 +- go.mod | 7 ++----- go.sum | 2 ++ 11 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cmd/airbyte-source/check.go b/cmd/airbyte-source/check.go index 97542fb..094dfc7 100644 --- a/cmd/airbyte-source/check.go +++ b/cmd/airbyte-source/check.go @@ -4,7 +4,7 @@ import ( "context" "encoding/json" "fmt" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "os" "github.com/planetscale/airbyte-source/cmd/internal" diff --git a/cmd/airbyte-source/check_test.go b/cmd/airbyte-source/check_test.go index 3613d12..6f7080b 100644 --- a/cmd/airbyte-source/check_test.go +++ b/cmd/airbyte-source/check_test.go @@ -4,7 +4,7 @@ import ( "bytes" "encoding/json" "fmt" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "os" "testing" diff --git a/cmd/airbyte-source/helper.go b/cmd/airbyte-source/helper.go index 12b436f..d8e2146 100644 --- a/cmd/airbyte-source/helper.go +++ b/cmd/airbyte-source/helper.go @@ -2,7 +2,7 @@ package airbyte_source import ( "github.com/planetscale/airbyte-source/cmd/internal" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "io" "os" ) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index 297b428..dfd6116 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -5,7 +5,7 @@ import ( "encoding/json" "fmt" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "os" "vitess.io/vitess/go/sqltypes" diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index 05a0e82..0fdc609 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -3,7 +3,7 @@ package airbyte_source import ( "context" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" ) type testFileReader struct { diff --git a/cmd/internal/planetscale_connection.go b/cmd/internal/planetscale_connection.go index cc245b6..31f354d 100644 --- a/cmd/internal/planetscale_connection.go +++ b/cmd/internal/planetscale_connection.go @@ -3,7 +3,7 @@ package internal import ( "fmt" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "strings" ) diff --git a/cmd/internal/planetscale_connection_test.go b/cmd/internal/planetscale_connection_test.go index dbd6c84..8fc0796 100644 --- a/cmd/internal/planetscale_connection_test.go +++ b/cmd/internal/planetscale_connection_test.go @@ -2,7 +2,7 @@ package internal import ( psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "github.com/stretchr/testify/assert" "testing" ) diff --git a/cmd/internal/schema_builder.go b/cmd/internal/schema_builder.go index f31bcda..9367c78 100644 --- a/cmd/internal/schema_builder.go +++ b/cmd/internal/schema_builder.go @@ -1,7 +1,7 @@ package internal import ( - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "regexp" "strings" ) diff --git a/cmd/internal/types.go b/cmd/internal/types.go index 9c5c6de..257df5a 100644 --- a/cmd/internal/types.go +++ b/cmd/internal/types.go @@ -2,7 +2,7 @@ package internal import ( "encoding/base64" - "github.com/planetscale/connectsdk/lib" + "github.com/planetscale/connect-sdk/lib" "regexp" "strconv" "strings" diff --git a/go.mod b/go.mod index 4114cfc..1402005 100644 --- a/go.mod +++ b/go.mod @@ -3,16 +3,15 @@ module github.com/planetscale/airbyte-source go 1.21.3 require ( - github.com/go-sql-driver/mysql v1.7.1 - github.com/planetscale/connectsdk v0.0.0-00010101000000-000000000000 + github.com/go-sql-driver/mysql v1.7.1 // indirect github.com/spf13/cobra v1.7.0 github.com/stretchr/testify v1.8.4 vitess.io/vitess v0.17.3 -//github.com/planetscale/vtprotobuf v0.5.0 //indirect ) require ( github.com/pkg/errors v0.9.1 + github.com/planetscale/connect-sdk v0.1.1 github.com/planetscale/psdb v0.0.0-20220429000526-e2a0e798aaf3 github.com/twitchtv/twirp v8.1.2+incompatible google.golang.org/grpc v1.59.0 @@ -83,5 +82,3 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect inet.af/netaddr v0.0.0-20220811202034-502d2d690317 // indirect ) - -replace github.com/planetscale/connectsdk => /Users/phaniraj/ps/connectsdk diff --git a/go.sum b/go.sum index 255b330..2333132 100644 --- a/go.sum +++ b/go.sum @@ -317,6 +317,8 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= +github.com/planetscale/connect-sdk v0.1.1 h1:5CjYAfy+o5oCR1+dlzfPmxqS/ZjBXVpfxvFUf9xXcbM= +github.com/planetscale/connect-sdk v0.1.1/go.mod h1:yXabTbpaaDZfHS5CcBIOCYCgug5jD73nt0BKTuGkOyU= github.com/planetscale/pargzip v0.0.0-20201116224723-90c7fc03ea8a h1:y0OpQ4+5tKxeh9+H+2cVgASl9yMZYV9CILinKOiKafA= github.com/planetscale/pargzip v0.0.0-20201116224723-90c7fc03ea8a/go.mod h1:GJFUzQuXIoB2Kjn1ZfDhJr/42D5nWOqRcIQVgCxTuIE= github.com/planetscale/psdb v0.0.0-20220429000526-e2a0e798aaf3 h1:oEgD8tPIpxrTTEvVEDsY9pjkJOiqmpOE5kCGnTIGyHM= From 58ce09ca19d9dc29bb16e5f3779cc2cff897623d Mon Sep 17 00:00:00 2001 From: Phani Raj Date: Wed, 10 Jan 2024 11:00:41 -0600 Subject: [PATCH 13/13] upgrade to latest library --- cmd/airbyte-source/helper.go | 11 ++++--- cmd/airbyte-source/read.go | 33 +++++++-------------- cmd/airbyte-source/test_types.go | 2 +- cmd/internal/result_builder.go | 49 ++++++++++++++++++++++++++++++++ cmd/internal/types_test.go | 2 +- go.mod | 2 +- go.sum | 2 ++ 7 files changed, 69 insertions(+), 32 deletions(-) create mode 100644 cmd/internal/result_builder.go diff --git a/cmd/airbyte-source/helper.go b/cmd/airbyte-source/helper.go index d8e2146..5e34885 100644 --- a/cmd/airbyte-source/helper.go +++ b/cmd/airbyte-source/helper.go @@ -39,12 +39,11 @@ func (h *Helper) EnsureDB(psc internal.PlanetScaleSource) error { } h.Source = lib.PlanetScaleSource{ - UseReplica: true, - Username: psc.Username, - Database: psc.Database, - Host: psc.Host, - Password: psc.Password, - TreatTinyIntAsBoolean: !psc.Options.DoNotTreatTinyIntAsBoolean, + UseReplica: true, + Username: psc.Username, + Database: psc.Database, + Host: psc.Host, + Password: psc.Password, } var err error h.MysqlClient, err = lib.NewMySQL(&h.Source) diff --git a/cmd/airbyte-source/read.go b/cmd/airbyte-source/read.go index dfd6116..2c22248 100644 --- a/cmd/airbyte-source/read.go +++ b/cmd/airbyte-source/read.go @@ -4,13 +4,11 @@ import ( "context" "encoding/json" "fmt" + "github.com/planetscale/airbyte-source/cmd/internal" psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" "github.com/planetscale/connect-sdk/lib" - "os" - "vitess.io/vitess/go/sqltypes" - - "github.com/planetscale/airbyte-source/cmd/internal" "github.com/spf13/cobra" + "os" ) var ( @@ -92,7 +90,8 @@ func ReadCommand(ch *Helper) *cobra.Command { } allColumns := []string{} - + rb := internal.NewResultBuilder(ch.Logger) + irb := rb.(*internal.ResultBuilder) for _, table := range catalog.Streams { keyspaceOrDatabase := table.Stream.Namespace if keyspaceOrDatabase == "" { @@ -104,29 +103,17 @@ func ReadCommand(ch *Helper) *cobra.Command { ch.Logger.Error(fmt.Sprintf("Unable to read state for stream %v", streamStateKey)) os.Exit(1) } + irb.SetKeyspace(keyspaceOrDatabase) + irb.SetTable(table.Stream.Name) for shardName, shardState := range streamState.Shards { - tc, err := shardState.SerializedCursorToTableCursor() + tc, err := shardState.DeserializeTableCursor() if err != nil { ch.Logger.Error(fmt.Sprintf("invalid cursor for stream %v, failed with [%v]", streamStateKey, err)) os.Exit(1) } - - onResult := func(qr *sqltypes.Result, _ lib.Operation) error { - data := internal.QueryResultToRecords(qr) - - for _, record := range data { - ch.Logger.Record(keyspaceOrDatabase, table.Stream.Name, record) - } - return nil - } - - onUpdate := func(*lib.UpdatedRow) error { - return nil - } - - onCursor := func(tc *psdbconnect.TableCursor) error { - sc, err := lib.TableCursorToSerializedCursor(tc) + irb.HandleOnCursor = func(tc *psdbconnect.TableCursor) error { + sc, err := lib.SerializeTableCursor(tc) if err != nil { return err } @@ -136,7 +123,7 @@ func ReadCommand(ch *Helper) *cobra.Command { return nil } - sc, err := ch.ConnectClient.Read(context.Background(), ch.Logger, ch.Source, table.Stream.Name, allColumns, tc, onResult, onCursor, onUpdate) + sc, err := ch.ConnectClient.Read(context.Background(), ch.Logger, ch.Source, table.Stream.Name, allColumns, tc, rb) if err != nil { ch.Logger.Error(err.Error()) os.Exit(1) diff --git a/cmd/airbyte-source/test_types.go b/cmd/airbyte-source/test_types.go index 0fdc609..63811b2 100644 --- a/cmd/airbyte-source/test_types.go +++ b/cmd/airbyte-source/test_types.go @@ -27,7 +27,7 @@ func (td testConnectClient) CanConnect(ctx context.Context, ps lib.PlanetScaleSo return td.connectResponse.err } -func (td testConnectClient) Read(context.Context, lib.DatabaseLogger, lib.PlanetScaleSource, string, []string, *psdbconnect.TableCursor, lib.OnResult, lib.OnCursor, lib.OnUpdate) (*lib.SerializedCursor, error) { +func (td testConnectClient) Read(context.Context, lib.DatabaseLogger, lib.PlanetScaleSource, string, []string, *psdbconnect.TableCursor, lib.ResultBuilder) (*lib.SerializedCursor, error) { // TODO implement me panic("implement me") } diff --git a/cmd/internal/result_builder.go b/cmd/internal/result_builder.go new file mode 100644 index 0000000..f9e5302 --- /dev/null +++ b/cmd/internal/result_builder.go @@ -0,0 +1,49 @@ +package internal + +import ( + "github.com/pkg/errors" + psdbconnect "github.com/planetscale/airbyte-source/proto/psdbconnect/v1alpha1" + "github.com/planetscale/connect-sdk/lib" + "vitess.io/vitess/go/sqltypes" +) + +type ResultBuilder struct { + keyspace string + table string + logger AirbyteLogger + HandleOnCursor func(tc *psdbconnect.TableCursor) error +} + +func NewResultBuilder(logger AirbyteLogger) lib.ResultBuilder { + return &ResultBuilder{ + logger: logger, + } +} + +func (rb *ResultBuilder) OnResult(result *sqltypes.Result, operation lib.Operation) error { + data := QueryResultToRecords(result) + for _, record := range data { + rb.logger.Record(rb.keyspace, rb.table, record) + } + return nil +} + +func (ResultBuilder) OnUpdate(row *lib.UpdatedRow) error { + return nil +} + +func (rb *ResultBuilder) OnCursor(cursor *psdbconnect.TableCursor) error { + if rb.HandleOnCursor == nil { + return errors.New("Unhandled onCursor event") + } + + return rb.HandleOnCursor(cursor) +} + +func (rb *ResultBuilder) SetKeyspace(keyspace string) { + rb.keyspace = keyspace +} + +func (rb *ResultBuilder) SetTable(table string) { + rb.table = table +} diff --git a/cmd/internal/types_test.go b/cmd/internal/types_test.go index 46e21e8..4b6b962 100644 --- a/cmd/internal/types_test.go +++ b/cmd/internal/types_test.go @@ -70,7 +70,7 @@ func TestCanUnmarshalLastKnownState(t *testing.T) { LastKnownPk: lastKnownPK, }) require.NoError(t, err) - tc, err := sc.SerializedCursorToTableCursor() + tc, err := sc.DeserializeTableCursor() require.NoError(t, err) assert.Equal(t, "connect", tc.Keyspace) assert.Equal(t, "40-80", tc.Shard) diff --git a/go.mod b/go.mod index 1402005..28332ca 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( require ( github.com/pkg/errors v0.9.1 - github.com/planetscale/connect-sdk v0.1.1 + github.com/planetscale/connect-sdk v0.1.2 github.com/planetscale/psdb v0.0.0-20220429000526-e2a0e798aaf3 github.com/twitchtv/twirp v8.1.2+incompatible google.golang.org/grpc v1.59.0 diff --git a/go.sum b/go.sum index 2333132..85d00af 100644 --- a/go.sum +++ b/go.sum @@ -319,6 +319,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= github.com/planetscale/connect-sdk v0.1.1 h1:5CjYAfy+o5oCR1+dlzfPmxqS/ZjBXVpfxvFUf9xXcbM= github.com/planetscale/connect-sdk v0.1.1/go.mod h1:yXabTbpaaDZfHS5CcBIOCYCgug5jD73nt0BKTuGkOyU= +github.com/planetscale/connect-sdk v0.1.2 h1:Md+VrAI+WKaRiCCorHYCHfQr2e/mtZUCvePebsTXb7U= +github.com/planetscale/connect-sdk v0.1.2/go.mod h1:s6B7tBzN+7ozfHFkcvowa9GzJB2vD40fbS2P2DWLySc= github.com/planetscale/pargzip v0.0.0-20201116224723-90c7fc03ea8a h1:y0OpQ4+5tKxeh9+H+2cVgASl9yMZYV9CILinKOiKafA= github.com/planetscale/pargzip v0.0.0-20201116224723-90c7fc03ea8a/go.mod h1:GJFUzQuXIoB2Kjn1ZfDhJr/42D5nWOqRcIQVgCxTuIE= github.com/planetscale/psdb v0.0.0-20220429000526-e2a0e798aaf3 h1:oEgD8tPIpxrTTEvVEDsY9pjkJOiqmpOE5kCGnTIGyHM=