From 88f0462f39e669f0efde56fe2ff0eba7d429ef10 Mon Sep 17 00:00:00 2001 From: Benita Volkmann Date: Mon, 20 Oct 2025 16:36:14 +0200 Subject: [PATCH] WIP - sync streams beta docs - updated strcutre --- docs.json | 32 +- usage/sync-rules.mdx | 27 +- usage/sync-streams.mdx | 434 ------------------ usage/sync-streams/advanced/compatibility.mdx | 114 +++++ .../advanced/multiple-client-versions.mdx | 257 +++++++++++ .../advanced/partitioned-tables.mdx | 26 ++ .../advanced/schemas-and-connections.mdx | 37 ++ .../advanced/shared-databases.mdx | 43 ++ usage/sync-streams/glossary.mdx | 158 +++++++ usage/sync-streams/quick-start.mdx | 228 +++++++++ .../reference/case-sensitivity.mdx | 43 ++ .../reference/client-primary-key.mdx | 64 +++ usage/sync-streams/reference/joins.mdx | 114 +++++ .../reference/operators-and-functions.mdx | 58 +++ usage/sync-streams/reference/query-syntax.mdx | 291 ++++++++++++ usage/sync-streams/reference/types.mdx | 107 +++++ usage/sync-streams/under-the-hood.mdx | 10 + 17 files changed, 1594 insertions(+), 449 deletions(-) delete mode 100644 usage/sync-streams.mdx create mode 100644 usage/sync-streams/advanced/compatibility.mdx create mode 100644 usage/sync-streams/advanced/multiple-client-versions.mdx create mode 100644 usage/sync-streams/advanced/partitioned-tables.mdx create mode 100644 usage/sync-streams/advanced/schemas-and-connections.mdx create mode 100644 usage/sync-streams/advanced/shared-databases.mdx create mode 100644 usage/sync-streams/glossary.mdx create mode 100644 usage/sync-streams/quick-start.mdx create mode 100644 usage/sync-streams/reference/case-sensitivity.mdx create mode 100644 usage/sync-streams/reference/client-primary-key.mdx create mode 100644 usage/sync-streams/reference/joins.mdx create mode 100644 usage/sync-streams/reference/operators-and-functions.mdx create mode 100644 usage/sync-streams/reference/query-syntax.mdx create mode 100644 usage/sync-streams/reference/types.mdx create mode 100644 usage/sync-streams/under-the-hood.mdx diff --git a/docs.json b/docs.json index 73f2943d..096a49c0 100644 --- a/docs.json +++ b/docs.json @@ -116,7 +116,36 @@ "group": "Usage", "pages": [ { - "group": "Sync Rules", + "group": "Sync Streams", + "pages": [ + "usage/sync-streams/quick-start", + { + "group": "Reference", + "pages": [ + "usage/sync-streams/reference/query-syntax", + "usage/sync-streams/reference/operators-and-functions", + "usage/sync-streams/reference/types", + "usage/sync-streams/reference/case-sensitivity", + "usage/sync-streams/reference/client-primary-key", + "usage/sync-streams/reference/joins" + ] + }, + { + "group": "Advanced", + "pages": [ + "usage/sync-streams/advanced/compatibility", + "usage/sync-streams/advanced/multiple-client-versions", + "usage/sync-streams/advanced/schemas-and-connections", + "usage/sync-streams/advanced/partitioned-tables", + "usage/sync-streams/advanced/shared-databases" + ] + }, + "usage/sync-streams/under-the-hood", + "usage/sync-streams/glossary" + ] + }, + { + "group": "Sync Rules (Legacy)", "pages": [ "usage/sync-rules", "usage/sync-rules/example-global-data", @@ -134,7 +163,6 @@ { "group": "Advanced Topics", "pages": [ - "usage/sync-streams", "usage/sync-rules/advanced-topics", "usage/sync-rules/advanced-topics/multiple-client-versions", "usage/sync-rules/advanced-topics/client-parameters", diff --git a/usage/sync-rules.mdx b/usage/sync-rules.mdx index 277ef7e5..e14eed8a 100644 --- a/usage/sync-rules.mdx +++ b/usage/sync-rules.mdx @@ -3,26 +3,27 @@ title: "Sync Rules" sidebarTitle: Overview --- -PowerSync Sync Rules allow developers to control which data gets synchronized to which devices (i.e. they enable _dynamic partial replication_). + +**Sync Rules are now legacy - Sync Streams (Beta) are recommended** -## Introduction +Sync Streams are now in Beta and will replace Sync Rules. They provide: +- Simpler syntax with unified queries and subqueries +- On-demand syncing with subscription-based parameters +- Better support for web apps and multi-tab scenarios +- TTL-based caching behavior -We recommend starting with our [Sync Rules from First Principles](https://www.powersync.com/blog/sync-rules-from-first-principles-partial-replication-to-sqlite) blog post, which explains on a high-level what Sync Rules are, why they exist and how to implement them. +**New projects should use [Sync Streams](/usage/sync-streams/quick-start).** -The remainder of these docs dive further into the details. +Existing projects can continue using Sync Rules - they will be supported for the foreseeable future. We recommend migrating to Sync Streams when ready. + - -**Sync Streams available in Early Alpha** +PowerSync Sync Rules allow developers to control which data gets synchronized to which devices (i.e. they enable _dynamic partial replication_). -[Sync Streams](/usage/sync-streams) are now available in early alpha! Sync Streams will eventually replace Sync Rules and are designed to allow for more dynamic syncing, while not compromising on existing offline-first capabilties. +## Introduction -Key improvements in Sync Streams over Sync Rules include: -- **On-demand syncing**: You define Sync Streams on the PowerSync Service, and a client can then subscribe to them one or more times with different parameters. -- **Temporary caching-like behavior**: Each subscription includes a configurable ttl that keeps data active after your app unsubscribes, acting as a warm cache for recently accessed data. -- **Simpler developer experience**: Simplified syntax and mental model, and capabilities such as your UI components automatically managing subscriptions (for example, React hooks). +We recommend starting with our [Sync Rules from First Principles](https://www.powersync.com/blog/sync-rules-from-first-principles-partial-replication-to-sqlite) blog post, which explains on a high-level what Sync Rules are, why they exist and how to implement them. -We encourage you to explore Sync Streams for new projects, and migrating to Sync Streams once they are in Beta. - +The remainder of these docs dive further into the details. ## Defining Sync Rules diff --git a/usage/sync-streams.mdx b/usage/sync-streams.mdx deleted file mode 100644 index 4ad1c7b1..00000000 --- a/usage/sync-streams.mdx +++ /dev/null @@ -1,434 +0,0 @@ ---- -title: "Sync Streams (Early Alpha)" -description: Sync Streams will replace Sync Rules and are designed to allow for more dynamic syncing, while not compromising on existing offline-first capabilties. ---- - -## Motivation - -PowerSync's original [Sync Rules](/usage/sync-rules) system was optimized for offline-first use cases where you want to “sync everything upfront” when the client connects, so that data is available locally if a user goes offline at any point. - -However, many developers are building apps where users are mostly online, and you don't want to make users wait to sync a lot of data upfront. In these cases, it's more suited to sync data on-demand. This is especially true for web apps: users are mostly online and you often want to sync only the data needed for the current page. Users also frequently have multiple tabs open, each needing different subsets of data. - -Sync engines like PowerSync are still great for these online web app use cases, because they provide you with real-time updates, simplified state management, and ease of working with data locally. - -[Client Parameters](/usage/sync-rules/advanced-topics/client-parameters) in the current Sync Rules system support on-demand syncing across different browser tabs to some extent: For example, using a `project_ids` array as a Client Parameter to sync only specific projects. However, manually managing these arrays across different browser tabs becomes quite painful. - -We are introducing **Sync Streams** to provide the best of both worlds: support for dynamic on-demand syncing, as well as "syncing everything upfront". - -Key improvements in Sync Streams over Sync Rules include: - -1. **On-demand syncing**: You define Sync Streams on the PowerSync Service, and a client can then subscribe to them one or more times with different parameters. -2. **Temporary caching-like behavior**: Each subscription includes a configurable `ttl` that keeps data active after your app unsubscribes, acting as a warm cache for recently accessed data. -3. **Simpler developer experience**: Simplified syntax and mental model, and capabilities such as your UI components automatically managing subscriptions (for example, React hooks). - -If you want “sync everything upfront” behavior (like the current Sync Rules system), that’s easy too: you can configure any of your Sync Streams to be auto-subscribed by the client on connecting. - - - -**Early Alpha Release** - -Sync Streams will ultimately replace the current Sync Rules system. They are currently in an early alpha release, which of course means they're not yet suitable for production use, and the APIs and DX likely still need refinement. - -They are open for anyone to test: we are actively seeking your feedback on their performance for your use cases, the developer experience, missing capabilities, and potential optimizations. Please share your feedback with us in Discord 🫡 - -Sync Streams will be supported alongside Sync Rules for the foreseeable future, although we recommend migrating to Sync Streams once in Beta. - - -## Requirements for Using Sync Streams - -* v1.15.0 of the PowerSync Service (Cloud instances are already on this version) -* Minimum SDK versions: - * JS: - * Web: v1.27.0 - * React Native: v1.25.0 - * React hooks: v1.8.0 - * Dart: v1.16.0 - * Kotlin: v1.7.0 - * Swift: Coming soon. -* Use of the [Rust-based sync client](https://releases.powersync.com/announcements/improved-sync-performance-in-our-client-sdks) - - - - In JavaScript SDKs, pass the `clientImplementation` option when connecting: - - ```js - await db.connect(new MyConnector(), { - clientImplementation: SyncClientImplementation.RUST - }); - ``` - - You can migrate back to the JavaScript client later by removing the option. - - - Pass the `syncImplementation` option when connecting: - - ```dart - database.connect( - connector: YourConnector(), - options: const SyncOptions( - syncImplementation: SyncClientImplementation.rust, - ), - ); - ``` - - You can migrate back to the Dart client later by removing the option. - - - Pass the `newClientImplementation` option when connecting: - - ```kotlin - //@file:OptIn(ExperimentalPowerSyncAPI::class) - database.connect(MyConnector(), options = SyncOptions( - newClientImplementation = true, - )) - ``` - - You can migrate back to the Kotlin client later by removing the option. - - - Pass the `newClientImplementation` option when connecting: - - ```swift - @_spi(PowerSyncExperimental) import PowerSync - - try await db.connect(connector: connector, options: ConnectOptions( - newClientImplementation: true, - )) - ``` - - You can migrate back to the Swift client later by removing the option. - - - -* Sync Stream definitions. They are currently defined in the same YAML file as Sync Rules: `sync_rules.yaml` (PowerSync Cloud) or `config.yaml` (Open Edition/self-hosted). To enable Sync Streams, add the following configuration: - - ```yaml sync_rules.yaml - config: - # see https://docs.powersync.com/usage/sync-rules/compatibility - # this edition also deploys several backwards-incompatible fixes - # see the docs for details - edition: 2 - - streams: - ... # see 'Stream Definition Syntax' section below - ``` - -## Stream Definition Syntax - -You specify **stream definitions** similar to bucket definitions in Sync Rules. Clients then subscribe to the defined streams one or more times, with different parameters. - -Syntax: -```yaml sync_rules.yaml -streams: - : - query: string # similar to Data Queries in Sync Rules, but also support limited subqueries. - auto_subscribe: boolean # true to subscribe to this stream by default (similar to how Sync Rules work), false (default) if clients should explicitly subscribe. - priority: number # sync priority, same as in Sync Rules: https://docs.powersync.com/usage/use-case-examples/prioritized-sync - accept_potentially_dangerous_queries: boolean # silence warnings on dangerous queries, same as in Sync Rules. -``` - -Basic example: -```yaml sync_rules.yaml -config: - edition: 2 -streams: - issue: # Define a stream to a specific issue - query: select * from issues where id = subscription.parameters() ->> 'id' - issue_comments: # Define a stream to a specific issue's comments - query: select * from comments where issue_id = subscription.parameters() ->> 'id' - -``` - - -### Just Queries with Subqueries - -Whereas Sync Rules had separate [Parameter Queries](/usage/sync-rules/parameter-queries) and [Data Queries](/usage/sync-rules/data-queries), Sync Streams only have a `query`. Instead of Parameter Queries, Sync Streams can use parameters directly in the query, and support a limited form of subqueries. For example: - -```yaml sync_rules.yaml -# use parameters directly in the query (see below for details on accessing parameters) -select * from issues where id = subscription.parameters() ->> 'id' and owner_id = auth.user_id() - -# "in (subquery)" replaces parameter queries: -select * from comments where issue_id in (select id from issues where owner_id = auth.user_id()) -``` - -Under the hood, Sync Streams use the same bucket system as Sync Rules, so you get the same functionality as before with Parameter Queries, however, the Sync Streams syntax is closer to plain SQL. - - -### Accessing Parameters - -We have streamlined how different kinds of parameters are accessed in Sync Streams [compared](/usage/sync-rules/parameter-queries) to Sync Rules. - -**Subscription Parameters**: Passed from the client when it subscribes to a Sync Stream. See [Client-Side Syntax](#client-side-syntax) below. Clients can subscribe to the same stream multiple times with -different parameters: - -```yaml -subscription.parameters() # all parameters for the subscription, as JSON -subscription.parameter('key') # shorthand for getting a single specific parameter -``` - -**Auth Parameters**: Claims from the JWT: - -```yaml -auth.parameters() # JWT token payload, as JSON -auth.parameter('key') # short-hand for getting a single specific token payload parameter -auth.user_id() # same as auth.parameter('sub') -``` - - -**Connection Parameters**: Specified "globally" on the connection level. These are the equivalent of [Client Parameters](/usage/sync-rules/advanced-topics/client-parameters) in Sync Rules: - -```yaml -connection.parameters() # all parameters for the connection, as JSON -connection.parameter('key') # shorthand for getting a single specific parameter -``` - -### Usage Examples: Sync Rules vs Sync Streams - - - -### Global data -**Sync Rules:** -```yaml sync_rules.yaml - bucket_definitions: - global: - data: - # Sync all todos - - SELECT * FROM todos - # Sync all lists except archived ones - - SELECT * FROM lists WHERE archived = false -``` -**Sync Streams:** "Global" data — the data you want all of your users to have by default — is also defined as streams. Specify `auto_subscribe: true` so your users subscribe to them by default. -```yaml sync_rules.yaml - streams: - all_todos: - query: SELECT * FROM todos - auto_subscribe: true - unarchived_lists: - query: SELECT * FROM lists WHERE archived = false - auto_subscribe: true - -``` - -### A user's owned lists, with a priority -**Sync Rules:** -```yaml sync_rules.yaml - bucket_definitions: - user_lists: - priority: 1 # See https://docs.powersync.com/usage/use-case-examples/prioritized-sync - parameters: SELECT request.user_id() as user_id - data: - - SELECT * FROM lists WHERE owner_id = bucket.user_id -``` - -**Sync Streams:** -```yaml sync_rules.yaml - streams: - user_lists: - priority: 1 # See https://docs.powersync.com/usage/use-case-examples/prioritized-sync - query: SELECT * FROM lists WHERE owner_id = auth.user_id() -``` - -### Grouping by `list_id` -**Sync Rules:** -```yaml sync_rules.yaml - bucket_definitions: - owned_lists: - parameters: | - SELECT id as list_id FROM lists WHERE - owner_id = request.user_id() - data: - - SELECT * FROM lists WHERE lists.id = bucket.list_id - - SELECT * FROM todos WHERE todos.list_id = bucket.list_id -``` -**Sync Streams:** -```yaml sync_rules.yaml - streams: - owned_lists: - query: SELECT * FROM lists WHERE owner_id = auth.user_id() - list_todos: - query: SELECT * FROM todos WHERE list_id = subscription.parameter('list_id') AND list_id IN (SELECT id FROM lists WHERE owner_id = auth.user_id()) - -``` - -### Parameters usage -**Sync Rules:** -```yaml sync_rules.yaml - bucket_definitions: - posts: - parameters: SELECT (request.parameters() ->> 'current_page') as page_number - data: - - SELECT * FROM posts WHERE page_number = bucket.page_number -``` -**Sync Streams:** -```yaml sync_rules.yaml - streams: - posts: - query: SELECT * FROM posts WHERE page_number = subscription.parameter('page_number') -``` -Note that the behavior here is different to Sync Rules because `subscription.parameter('page_number')` is local to the subscription, so the Sync Stream can be subscribed to multiple times with different page numbers, whereas Sync Rules only allow a single global Client Parameter value at a time. Connection Parameters (`connection.parameter()`) are available in Sync Streams as the equivalent of the global Client Parameters in Sync Rules, but Subscription Parameters are recommended because they are much more flexible. - -### Specific columns/fields, renames and transformations - -Selecting, renaming or transforming specific columns/fields is identical between Sync Rules and Sync Streams: - -```yaml sync_rules.yaml - streams: - todos: - # Select specific columns - query: SELECT id, name, owner_id FROM todos - # Rename columns - query: SELECT id, name, created_timestamp AS created_at FROM todos - # Cast number to text - query: SELECT id, item_number :: text AS item_number FROM todos - # Alternative syntax for the same cast - query: id, CAST(item_number as TEXT) AS item_number FROM todos - # Convert binary data (bytea) to base64 - query: id, base64(thumbnail) AS thumbnail_base64 FROM todos - # Extract field from JSON or JSONB column - query: id, metadata_json ->> 'description' AS description FROM todos - # Convert time to epoch number - query: id, unixepoch(created_at) AS created_at FROM todos -``` - - - -## Client-Side Syntax - -In general, each SDK lets you: - -* Use `db.syncStream(name, [subscription-params])` to get a `SyncStream` instance. -* Call `subscribe()` on a `SyncStream` to get a `SyncStreamSubscription`. This gives you access to `waitForFirstSync()` and `unsubscribe()`. -* Inspect `SyncStatus` for a list of `SyncSubscriptionDefinition`s describing all Sync Streams your app is subscribed to (either due to an explicit subscription or because the Sync Stream has `auto_subscribe: true`). It also reports per-stream download progress. -* Each Sync Stream has a `ttl` (time-to-live). After you call `unsubscribe()`, or when the page/app closes, the stream keeps syncing for the `ttl` duration, enabling caching-like behavior. Each SDK lets you specify the `ttl`, or ignore the `ttl` and delete the data as soon as possible. If not specified, a default TTL of 24 hours applies. - -Select your language for specific examples: - - - ```js - const sub = await powerSync.syncStream('issues', {id: 'issue-id'}).subscribe(ttl: 3600); - - // Resolve current status for subscription - const status = powerSync.currentStatus.forStream(sub); - const progress = status?.progress; - - // Wait for this subscription to have synced - await sub.waitForFirstSync(); - - // When the component needing the subscription is no longer active... - sub.unsubscribe(); - ``` - - If you're using React, you can also use hooks to automatically subscribe components to Sync Streams: - - ```js - const stream = useSyncStream({ name: 'todo_list', parameters: { list: 'foo' } }); - // Can then check for download progress or subscription information - stream?.progress; - stream?.subscription.hasSynced; - ``` - - This hook is useful when you want to explicitly ensure a stream is active (for example a root component) or when you need progress/hasSynced state; this makes data available for all child components without each query declaring the stream. - - Additionally, the `useQuery` hook for React can wait for Sync Streams to be complete before running - queries. Pass `streams` only when the component knows which specific stream subscription(s) it depends on and it should wait before querying. - - ```js - const results = useQuery( - 'SELECT ...', - queryParameters, - // This will wait for the stream to sync before running the query - { streams: [{ name: 'todo_list', parameters: { list: 'foo' }, waitForStream: true }] } - ); - ``` - - - - ```dart - final sub = await db - .syncStream('issues', {'id': 'issue-id'}) - .subscribe(ttl: const Duration(hours: 1)); - - // Resolve current status for subscription - final status = db.currentStatus.forStream(sub); - final progress = status?.progress; - - // Wait for this subscription to have synced - await sub.waitForFirstSync(); - - // When the component needing the subscription is no longer active... - sub.unsubscribe(); - ``` - - - - ```Kotlin - val sub = database.syncStream("issues", mapOf("id" to JsonParam.String("issue-id"))).subscribe(ttl = 1.0.hours); - - // Resolve current status for subscription - val status = database.currentStatus.forStream(sub) - val progress = status?.progress - - // Wait for this subscription to have synced - sub.waitForFirstSync() - - // When the component needing the subscription is no longer active... - sub.unsubscribe() - ``` - - If you're using Compose, you can use the `composeSyncStream` extension to subscribe to a stream while - a composition is active: - - ```Kotlin - @Composable - fun TodoListPage(db: PowerSyncDatabase, id: String) { - val syncStream = db.composeSyncStream(name = "list", parameters = mapOf("list_id" to JsonParam.String(id))) - // Define component based on stream state - } - ``` - - - - Coming soon - - - -## Examples - - - - Try the [`react-supabase-todolist-sync-streams`](https://github.com/powersync-ja/powersync-js/tree/main/demos/react-supabase-todolist-sync-streams) demo app by following the instructions in the README. - - In this demo: - - - The app syncs `lists` by default (demonstrating equivalent behavior to Sync Rules, i.e. optimized for offline-first). - - The app syncs `todos` on demand when a user opens a list. - - When the user navigates back to the same list, they won't see a loading state — demonstrating caching behavior. - - - - Try the [`supabase-todolist`](https://github.com/powersync-ja/powersync.dart/tree/main/demos/supabase-todolist) demo app, which we updated to use Sync Streams (Sync Rules are still supported). - - Deploy the following Sync Streams: - - ```yaml sync_rules.yaml - config: - edition: 2 - streams: - lists: - query: SELECT * FROM lists - auto_subscribe: true - todos: - query: SELECT * FROM todos WHERE list_id = subscription.parameter('list') - ``` - - In this demo: - - - The app syncs `lists` by default (demonstrating equivalent behavior to Sync Rules, i.e. optimized for offline-first). - - The app syncs `todos` on demand when a user opens a list. - - When the user navigates back to the same list, they won't see a loading state — demonstrating caching behavior. - - - In progress, follow along: https://github.com/powersync-ja/powersync-kotlin/pull/270 - - \ No newline at end of file diff --git a/usage/sync-streams/advanced/compatibility.mdx b/usage/sync-streams/advanced/compatibility.mdx new file mode 100644 index 00000000..fc339c7f --- /dev/null +++ b/usage/sync-streams/advanced/compatibility.mdx @@ -0,0 +1,114 @@ +--- +title: "Compatibility" +description: "Configure sync behavior: enable latest backwards-incompatible fixes (recommended for new projects) or keep legacy behavior." +--- + +To ensure consistency, it is important that the PowerSync Service does not interpret the same source row in different ways after updating to a new version. +At the same time, we want to fix bugs or other inaccuracies that have accumulated during the development of the Service. + +## Overview + +To make this trade‑off explicit, you choose whether to keep the existing behavior or turn on newer fixes that slightly change how data is processed. + +Use the `config` block in your Sync Streams YAML file to choose the behavior. There are two ways to turn fixes on: + +1. Set an `edition` to enable the full set of fixes for that edition. This is the recommended approach for new projects. +2. Toggle individual options for more fine‑grained control. + +For older projects, the previous behavior remains the default. **New Sync Streams projects should enable all current fixes.** + +## Configuration + +**For Sync Streams projects, it is recommended to enable all current fixes by setting `edition: 2`:** + +```yaml sync_streams.yaml +config: + edition: 2 # Required for Sync Streams - enables all current fixes + +streams: + # ... your stream definitions +``` + +Or, specify options individually: + +```yaml sync_streams.yaml +config: + timestamps_iso8601: true + versioned_bucket_ids: true + fixed_json_extract: true + custom_postgres_types: true + +streams: + # ... your stream definitions +``` + +## Supported fixes + +This table lists all fixes currently supported: + +| Name | Explanation | Added in Service version | Fixed in edition | +|----------------------------|------------------------------------|--------------|------------------| +| `timestamps_iso8601` | [Link](#timestamps-iso8601) | 1.15.0 | 2 | +| `versioned_bucket_ids` | [Link](#versioned-bucket-ids) | 1.15.0 | 2 | +| `fixed_json_extract` | [Link](#fixed-json-extract) | 1.15.0 | 2 | +| `custom_postgres_types` | [Link](#custom-postgres-types). | 1.15.3 | 2 | + +### `timestamps_iso8601` + +PowerSync is supposed to encode timestamps according to the ISO-8601 standard. +Without this fix, the service encoded timestamps from MongoDB and Postgres source databases incorrectly. +To ensure time values from Postgres compare lexicographically, they're also padded to six digits of accuracy when encoded. +Since MongoDB only stores values with an accuracy of milliseconds, only three digits of accuracy are used. + +For instance, the value `2025-09-22T14:29:30` would be encoded as follows: + +- For Postgres: `2025-09-22 14:29:30` without the fix, `2025-09-22T14:29:30.000000` with the fix applied. +- For MongoDB: `2025-09-22 14:29:30.000` without the fix, `2025-09-22T14:29:30.000` with the fix applied. + +Note that MySQL has never been affected by this issue, and thus behaves the same regardless of the option used. + +### `versioned_bucket_ids` + +Sync Streams define streams, which rows to sync are then assigned to. When you run a full defragmentation or +redeploy Sync Streams, the same bucket identifiers are re-used when processing data again. + +Because the second iteration uses different checksums for the same bucket ids, clients may sync data +twice before realizing that something is off and starting from scratch. + +Applying this fix improves client-side progress estimation and is more efficient, since data would not get +downloaded twice. + +### `fixed_json_extract` + +This fixes the `json_extract` functions as well as the `->` and `->>` operators in sync streams to behave similar +to recent SQLite versions: We only split on `.` if the path starts with `$.`. + +For instance, `'json_extract({"foo.bar": "baz"}', 'foo.bar')` would evaluate to: + +1. `baz` with the option enabled. +2. `null` with the option disabled. + +### `custom_postgres_types` + +If you have custom Postgres types in your backend database schema, older versions of the PowerSync Service +would not recognize these values and sync them with the textual wire representation used by Postgres. +This is especially noticeable when defining `DOMAIN` types with e.g. a `REAL` inner type: The wrapped +`DOMAIN` type should get synced as a real value as well, but it would actually get synced as a string. + +With this fix applied: + +- `DOMAIN TYPE`s are synced as their inner type. +- Array types of custom types get parsed correctly, and sync as a JSON array. +- Custom types get parsed and synced as a JSON object containing their members. +- Ranges sync as a JSON object corresponding to the following TypeScript definition: + ```TypeScript + export type Range = + | { + lower: T | null; + upper: T | null; + lower_exclusive: boolean; + upper_exclusive: boolean; + } + | 'empty'; + ``` +- Multi-ranges sync as an array of ranges. diff --git a/usage/sync-streams/advanced/multiple-client-versions.mdx b/usage/sync-streams/advanced/multiple-client-versions.mdx new file mode 100644 index 00000000..988bdb5b --- /dev/null +++ b/usage/sync-streams/advanced/multiple-client-versions.mdx @@ -0,0 +1,257 @@ +--- +title: "Multiple Client Versions" +description: "In some cases, different client versions may need different output schemas." +--- + +When schema changes are additive, old clients would just ignore the new tables and columns, and no special handling is required. However, in some cases, the schema changes may be more drastic and may need separate Sync Streams based on the client version. + +To distinguish between client versions, we can pass in an additional [connection parameter](/usage/sync-streams/reference/query-syntax#connection-parameters) from the client to the PowerSync Service instance. These parameters could be used to implement different logic based on the client version, similar to [Client Parameters](/usage/sync-rules/advanced-topics/client-parameters) in Sync Rules. + +Example to use different table names based on the client's `schema_version`: + +```yaml +# Client passes in: "params": {"schema_version": } +streams: + assets_v1: + query: SELECT * FROM assets AS assets_v1 + WHERE user_id = auth.user_id() + AND connection.parameter('schema_version') = '1' + + assets_v2: + query: SELECT * FROM assets AS assets_v2 + WHERE user_id = auth.user_id() + AND connection.parameter('schema_version') = '2' +``` + +## Setting Connection Parameters + +Connection parameters are set when connecting to PowerSync: + +```js +// JavaScript +await powerSync.connect(connector, { + params: { schema_version: '2' } +}); +``` + +```dart +// Dart +await database.connect( + connector: YourConnector(), + options: const SyncOptions( + params: {'schema_version': '2'}, + ), +); +``` + +```kotlin +// Kotlin +database.connect(MyConnector(), options = SyncOptions( + params = mapOf("schema_version" to JsonParam.String("2")) +)) +``` + +## Advanced Versioning Examples + +### Different Column Names + +```yaml +streams: + user_profile_v1: + query: SELECT + id, + name, + email, + created_at + FROM users + WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1' + + user_profile_v2: + query: SELECT + id, + first_name, + last_name, + email_address, + created_timestamp + FROM users + WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '2' +``` + +### Different Table Structures + +```yaml +streams: + tasks_v1: + query: SELECT + id, + title, + description, + status, + created_at + FROM tasks + WHERE user_id = auth.user_id() + AND connection.parameter('schema_version') = '1' + + tasks_v2: + query: SELECT + id, + title, + description, + status, + priority, + due_date, + created_at, + updated_at + FROM tasks + WHERE user_id = auth.user_id() + AND connection.parameter('schema_version') = '2' +``` + +### Feature Flags + +```yaml +streams: + basic_todos: + query: SELECT id, title, completed FROM todos + WHERE user_id = auth.user_id() + + todos_with_attachments: + query: SELECT + id, + title, + completed, + attachment_count, + attachment_urls + FROM todos + WHERE user_id = auth.user_id() + AND connection.parameter('enable_attachments') = 'true' +``` + +### Gradual Rollout + +```yaml +streams: + stable_features: + query: SELECT * FROM stable_data + WHERE user_id = auth.user_id() + + beta_features: + query: SELECT * FROM beta_data + WHERE user_id = auth.user_id() + AND connection.parameter('beta_enabled') = 'true' + AND connection.parameter('user_tier') IN ('premium', 'enterprise') +``` + + + Handle queries based on parameters set by the client with care. The client can send any value for these parameters, so it's not a good place to do authorization. If the parameter must be authenticated, use parameters from the JWT instead. Read more: [Security consideration](/usage/sync-rules/advanced-topics/client-parameters#security-consideration) + + +## Migration Strategy + +### Phase 1: Add New Streams + +Add new streams for the new schema version while keeping the old ones: + +```yaml +streams: + # Existing streams for v1 + user_data_v1: + query: SELECT * FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1' + + # New streams for v2 + user_data_v2: + query: SELECT * FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '2' +``` + +### Phase 2: Update Clients Gradually + +Update clients to use the new schema version: + +```js +// Update client to use new schema version +await powerSync.connect(connector, { + params: { schema_version: '2' } +}); +``` + +### Phase 3: Remove Old Streams + +Once all clients are migrated, remove the old streams: + +```yaml +streams: + # Only keep v2 streams + user_data: + query: SELECT * FROM users WHERE id = auth.user_id() +``` + +## Best Practices + +### Use Semantic Versioning + +Use semantic versioning for schema versions: + +```yaml +streams: + user_data_v1_0: + query: SELECT * FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1.0' + + user_data_v1_1: + query: SELECT * FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1.1' +``` + +### Document Breaking Changes + +Clearly document what changes between versions: + +```yaml +# v1.0: Basic user data +# v1.1: Added profile picture and bio +# v2.0: Renamed fields and added preferences +streams: + user_data_v1_0: + query: SELECT id, name, email FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1.0' + + user_data_v1_1: + query: SELECT id, name, email, profile_picture, bio FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '1.1' + + user_data_v2_0: + query: SELECT + id, + first_name, + last_name, + email_address, + avatar_url, + description, + preferences + FROM users WHERE id = auth.user_id() + AND connection.parameter('schema_version') = '2.0' +``` + +### Test Both Versions + +Ensure your backend can handle both old and new schema versions during the transition period. + +### Monitor Usage + +Track which schema versions are being used to plan your migration timeline: + +```yaml +streams: + # Add a stream to track schema version usage + schema_version_usage: + query: SELECT + connection.parameter('schema_version') as version, + auth.user_id() as user_id, + '2024-01-01 00:00:00' as timestamp + FROM users + WHERE auth.user_id() IS NOT NULL +``` diff --git a/usage/sync-streams/advanced/partitioned-tables.mdx b/usage/sync-streams/advanced/partitioned-tables.mdx new file mode 100644 index 00000000..2abfef3d --- /dev/null +++ b/usage/sync-streams/advanced/partitioned-tables.mdx @@ -0,0 +1,26 @@ +--- +title: "Partitioned Tables (Postgres)" +description: "Partitioned tables and wildcard table name matching" +--- + +For partitioned tables in Postgres, each individual partition is replicated and processed using Sync Streams. + +To use the same queries and same output table name for each partition, use `%` for wildcard suffix matching of the table name: +```yaml +streams: + by_user: + # Use wildcard in a stream query + query: SELECT * FROM "todos_%" AS todos WHERE user_id = auth.user_id() +``` + +The wildcard character can only be used as the last character in the table name. + +When using wildcard table names, the original table suffix is available in the special `_table_suffix` column: + +```sql +SELECT * FROM "todos_%" AS todos WHERE _table_suffix != 'archived' +``` + +When no table alias is provided, the original table name is preserved. + +`publish_via_partition_root` on the publication is not supported — the individual partitions must be published. diff --git a/usage/sync-streams/advanced/schemas-and-connections.mdx b/usage/sync-streams/advanced/schemas-and-connections.mdx new file mode 100644 index 00000000..f854af58 --- /dev/null +++ b/usage/sync-streams/advanced/schemas-and-connections.mdx @@ -0,0 +1,37 @@ +--- +title: "Schemas and Connections" +--- + +## Schemas (Postgres) + +When no schema is specified, the Postgres `public` schema is used for every query. A different schema can be specified as a prefix: + +```sql +-- Note: the schema must be in double quotes +SELECT * FROM "other"."assets" +``` + +## High Availability / Replicated Databases (Postgres) + +When the source Postgres database is replicated, for example with Amazon RDS Multi-AZ deployments, specify a single connection with multiple host endpoints. Each host endpoint will be tried in sequence, with the first available primary connection being used. + +For this, each endpoint must point to the same physical database, with the same replication slots. This is the case when block-level replication is used between the databases, but not when streaming physical or logical replication is used. In those cases, replication slots are unique on each host, and all data would be re-synced in a fail-over event. + +## Multiple Separate Database Connections (Planned) + + + This feature will be available in a future release. See this [item on our roadmap](https://roadmap.powersync.com/c/84-support-for-sharding-multiple-database-connections). + + +In the future, it will be possible to configure PowerSync with multiple separate backend database connections, where each connection is concurrently replicated. + +You should not add multiple connections to multiple replicas of the same database — this would cause data duplication. Only use this when the data on each connection does not overlap. + +It will be possible for each connection to be configured with a "tag", to distinguish these connections in Sync Streams. The same tag may be used for multiple connections (if the schema is the same in each). + +By default, queries will reference the "default" tag. To use a different connection or connections, assign a different tag, and specify it in the query as a schema prefix. In this case, the schema itself must also be specified. + +```sql +-- Note the usage of quotes here +SELECT * FROM "secondconnection.public"."assets" +``` diff --git a/usage/sync-streams/advanced/shared-databases.mdx b/usage/sync-streams/advanced/shared-databases.mdx new file mode 100644 index 00000000..44098e20 --- /dev/null +++ b/usage/sync-streams/advanced/shared-databases.mdx @@ -0,0 +1,43 @@ +--- +title: "Shared Databases" +description: "Sharding is often used in backend databases to handle higher data volumes." +--- + +In the case of Postgres, PowerSync cannot replicate Postgres [foreign tables](https://www.postgresql.org/docs/current/ddl-foreign-data.html). + +However, PowerSync does have options available to support shared databases in general. + + + When using MongoDB or MySQL as the backend source database, PowerSync does not currently support connecting to sharded clusters. + + +The primary options are: + +1. Use a separate PowerSync Service instance per database. +2. Add a connection for each database in the same PowerSync Service instance ([planned](https://roadmap.powersync.com/c/84-support-for-sharding-multiple-database-connections); this capability will be available in a future release). + +Where feasible, using separate PowerSync Service instances would give better performance and give more control over how changes are rolled out, especially around Sync Stream reprocessing. + +Some specific scenarios: + +#### 1\. Different tables on different databases + +This is common when separate "services" use separate databases, but multiple tables across those databases need to be synced to the same users. + +Use a single PowerSync Service instance, with a separate connection for each source database ([planned](https://roadmap.powersync.com/c/84-support-for-sharding-multiple-database-connections); this capability will be available in a future release). Use a unique [connection tag](/usage/sync-streams/advanced/schemas-and-connections) for each source database, allowing them to be distinguished in the Sync Streams. + +#### 2a. All data for any single customer is contained in a single shard + +This is common when sharding per customer account / organization. + +In this case, use a separate PowerSync Service instance for each database. + +#### 2b. Most customer data is in a single shard, but some data is in a shared database + +If the amount of shared data is small, still use a separate PowerSync Service instance for each database, but also add the shared database connection to each PowerSync Service instance using a separate connection tag ([planned](https://roadmap.powersync.com/c/84-support-for-sharding-multiple-database-connections); this capability will be available in a future release). + +#### 3\. Only some tables are sharded + +In some cases, most tables would be on a shared server, with only a few large tables being sharded. + +For this case, use a single PowerSync Service instance. Add each shard as a new connection on this instance ([planned](https://roadmap.powersync.com/c/84-support-for-sharding-multiple-database-connections); this capability will be available in a future release) — all with the same connection tag, so that the same Sync Streams apply to each. diff --git a/usage/sync-streams/glossary.mdx b/usage/sync-streams/glossary.mdx new file mode 100644 index 00000000..030375bf --- /dev/null +++ b/usage/sync-streams/glossary.mdx @@ -0,0 +1,158 @@ +--- +title: "Glossary" +description: "Key terms and concepts used in Sync Streams." +--- + +## Sync Stream / Stream Definition + +A **Sync Stream** is a way of defining a subset of data that you'd like to sync. A Sync Stream is defined with a Query that determines what data from the backend database will be synced. The Query can reference Parameters: +f `request` in the syntax) +- **Subscription Parameters**: These are passed from the client when the Sync Stream Subscription is created. + - Use `subscription.parameters()` and `subscription.parameter('name')` +- **Connection Parameters**: Specified on the connection-level. Legacy Client Parameters will be migrated to become Connection Parameters. + - Use `connection.parameters()` and `connection.parameter('name')` +- **Auth Parameters**: Claims from the JWT. + - Use `auth.parameter('name')`, `auth.user_id()` and `auth.jwt()` in the syntax + +Sync Streams can define a Priority. This maps to the sync priorities in the legacy Sync Rules system. + +```yaml +streams: + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() + auto_subscribe: true + priority: 1 +``` + +## Sync Stream Subscription + +The client app **Subscribes** to Sync Streams, resulting in **Sync Stream Subscriptions** being created. A client can Subscribe to the same Sync Stream multiple times with different Parameters. + +- When `auto_subscribe: true` is set on the Sync Stream definition, all clients will be automatically subscribed to those Sync Streams (this will be used as a migration path for legacy Sync Rules to Sync Streams). +- When subscribing, the client can specify a TTL for the subscription, to achieve temporary caching-like behavior. +- The client can also explicitly Unsubscribe from Sync Streams that were subscribed to. + +```js +// Subscribe to a stream with parameters +const sub = await powerSync.syncStream('project_issues', {project_id: 'proj1'}).subscribe(); + +// Subscribe with TTL for caching behavior +const subWithTTL = await powerSync.syncStream('issues', {id: 'issue-123'}) + .subscribe({ttl: 3600}); + +// Unsubscribe when done +sub.unsubscribe(); +``` + +## Subscription Parameters + +**Subscription Parameters** are passed from the client when the Sync Stream Subscription is created. Use `subscription.parameters()` and `subscription.parameter('name')` in your queries. + +```yaml +streams: + project_issues: + query: SELECT * FROM issues WHERE project_id = subscription.parameter('project_id') +``` + +## Auth Parameters + +**Auth Parameters** are claims from the JWT. Use `auth.parameter('name')`, `auth.user_id()` and `auth.jwt()` in the syntax. + +```yaml +streams: + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() +``` + +## Connection Parameters + +**Connection Parameters** are specified on the connection-level. Legacy Client Parameters will be migrated to become Connection Parameters. Use `connection.parameters()` and `connection.parameter('name')`. + +```yaml +streams: + org_data: + query: SELECT * FROM projects WHERE org_id = connection.parameter('org_id') +``` + +## Auto-Subscribe + +**Auto-subscribe** streams are automatically subscribed to when a client connects, similar to how Sync Rules work. This means the data syncs upfront when the client connects. This is used as a migration path for legacy Sync Rules to Sync Streams. + +```yaml +streams: + # Global data - synced to all users upfront + global_categories: + query: SELECT * FROM categories + auto_subscribe: true + + # User-specific data - synced upfront for each user + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() + auto_subscribe: true +``` + +## Priority + +**Priority** maps to the sync priorities in the legacy Sync Rules system. Higher priority streams are synced before lower priority ones. + +```yaml +streams: + critical_data: + query: SELECT * FROM critical_table + priority: 1 # High priority + + background_data: + query: SELECT * FROM background_table + priority: 10 # Lower priority +``` + +## TTL (Time-to-Live) + +**TTL** determines how long data remains cached after a client unsubscribes from a stream. When subscribing, the client can specify a TTL for the subscription, to achieve temporary caching-like behavior for recently accessed data. + +```js +// Subscribe with 1 hour TTL +const sub = await powerSync.syncStream('issues', {id: 'issue-123'}) + .subscribe({ttl: 3600}); +``` + +## Bucket + +A **bucket** is a collection of rows from one or more tables that are grouped together for syncing. Each bucket has a unique identifier and contains all rows that match its parameters. + + + Buckets are an implementation detail of how Sync Streams work internally. You don't need to think about buckets when using Sync Streams, but understanding them can help with performance optimization. + + +## Bucket Parameters + +**Bucket parameters** are the set of values that uniquely identify a bucket. In Sync Streams, these come from subscription parameters, auth parameters, or connection parameters. + +## Subqueries + +**Subqueries** are queries within queries, using `IN (subquery)` syntax. Sync Streams support limited subqueries, in the form of `WHERE IN (SELECT …)`. + +```yaml +streams: + user_post_comments: + query: SELECT * FROM comments WHERE post_id IN ( + SELECT id FROM posts WHERE author_id = auth.user_id() + ) +``` + +## Global Data + +**Global data** is data that should be synced to all users, such as categories, settings, or reference data. In Sync Streams, this is achieved using queries that don't filter by user (no `auth.user_id()` clause). You can use `auto_subscribe: true` to sync global data upfront, or subscribe to it on-demand. + +## User-Specific Data + +**User-specific data** is data that should only be synced to the user who owns it. This is typically achieved using `auth.user_id()` in the stream query. + +## On-Demand Data + +**On-demand data** is data that is only synced when specifically requested by the client, typically using subscription parameters. + +## Bucket Limit + +The **bucket limit** is a constraint of 1,000 buckets per user. This limit exists to ensure good performance and should be considered when designing your streams. + diff --git a/usage/sync-streams/quick-start.mdx b/usage/sync-streams/quick-start.mdx new file mode 100644 index 00000000..dfc39565 --- /dev/null +++ b/usage/sync-streams/quick-start.mdx @@ -0,0 +1,228 @@ +--- +title: "Quick Start" +description: "Sync Streams enable partial sync: sync only the data users need, when they need it. Learn how to use Sync Streams in this guide." +--- + +## Introduction + +In typical cloud-first apps, your backend database holds all your data, which is made available to users via an API. This approach requires data to be _fetched_ by client apps through API calls. + +By contrast, a sync engine like PowerSync _syncs_ all data relevant to a specific user to a local database embedded in the client-side app, allowing queries to run locally on the user's device. Data is synced through the [PowerSync Service](/architecture/powersync-service), a backend service that client apps connect to. + +Users are typically only authorized to access certain data in the backend database, and on top of that, there is typically too much data to sync to every user's device. Therefore, it's necessary to control which data is synced to users. Sync Streams enable **partial sync** - the ability to control which data gets synced to which users. + +For a deeper understanding of how partial sync works, see the [Under the Hood](/usage/sync-streams/under-the-hood) page. + +## Sync Streams vs Sync Rules + +PowerSync originally used **[Sync Rules](/usage/sync-rules)** for partial sync, but we've introduced **Sync Streams** as a more flexible and developer-friendly approach. + + + + **Sync Rules** were designed for offline-first mobile apps where you want to "sync everything upfront" when the client connects, so that data is available locally if a user goes offline at any point. This approach works well for mobile apps where users expect to work offline and need all their data available immediately. + + However, many developers are building web apps where users are mostly online and you don't want to make users wait to sync a lot of data upfront. In these cases, it's more suited to sync data on-demand as users navigate through your application. + + Sync engines like PowerSync are still great for these online web app use cases, because they provide you with real-time updates, simplified state management, and ease of working with data locally. + + Client Parameters in the current Sync Rules system support on-demand syncing across different browser tabs to some extent: For example, using a `project_ids` array as a Client Parameter to sync only specific projects. However, manually managing these arrays across different browser tabs becomes quite painful, especially when you need to coordinate which projects are being viewed in which tabs. + + We are introducing **Sync Streams** to provide the best of both worlds: support for dynamic on-demand syncing, as well as "syncing everything upfront". + + **Key improvements in Sync Streams over Sync Rules include:** + + 1. **On-demand syncing**: You define Sync Streams on the PowerSync Service, and a client can then subscribe to them one or more times with different parameters. This means you can load data exactly when you need it, rather than syncing everything upfront. + + 2. **Temporary caching-like behavior**: Each subscription includes a configurable TTL that keeps data active after your app unsubscribes, acting as a warm cache for recently accessed data. This gives you the performance benefits of local data without the complexity of managing what's synced. + + 3. **Simpler developer experience**: Simplified syntax and mental model, and capabilities such as your UI components automatically managing subscriptions (for example, React hooks). This reduces the cognitive load of understanding when and how data gets synced. + + If you want "sync everything upfront" behavior (like the current Sync Rules system), that's easy too: you can configure any of your Sync Streams to be auto-subscribed by the client on connecting. + + + + If you're familiar with Sync Rules, here's how the key concepts map to Sync Streams: + + | Sync Rules Concept | Sync Streams Equivalent | + |-------------------|------------------------| + | [Bucket definitions](/usage/sync-rules) | Stream definitions | + | [Parameter queries + Data queries](/usage/sync-rules/parameter-queries) | Single unified `query` with subqueries | + | `request.user_id()` | `auth.user_id()` | + | `request.jwt()` | `auth.parameters()` | + | [`request.parameters()` (client params)](/usage/sync-rules/advanced-topics/client-parameters) | `connection.parameters()` (global) or `subscription.parameters()` (per-subscription) | + | Global buckets | todo | + | `bucket.param` | todo | + + + todo + + + + +## Requirements + +- v1.15.0 of the PowerSync Service (Cloud instances are already on this version) +- Minimum SDK versions: + * JS: + * Web: v1.27.0 + * React Native: v1.25.0 + * React hooks: v1.8.0 + * Dart: v1.16.0 + * Kotlin: v1.7.0 + * Swift: Coming soon. +- Use of the [Rust-based sync client](https://releases.powersync.com/announcements/improved-sync-performance-in-our-client-sdks) +- Sync Stream definitions with `edition: 2` [configuration](/usage/sync-streams/advanced/compatibility). These are located in `sync_streams.yaml` (PowerSync Cloud) or `config.yaml` (Open Edition/self-hosted). To enable Sync Streams, add the following configuration: + +```yaml sync_streams.yaml +config: + # see https://docs.powersync.com/usage/sync-rules/compatibility + # this edition also deploys several backwards-incompatible fixes + # see the docs for details + edition: 2 + +streams: + ... # see 'Stream Definition Syntax' section below +``` +## Stream Definition Syntax + +You specify **stream definitions** in your `sync_streams.yaml` file. Clients then subscribe to the defined streams one or more times, with different parameters. + +```yaml sync_streams.yaml +config: + edition: 2 + +streams: + : + query: string # SQL-like query with subquery support + auto_subscribe: boolean # true to subscribe by default (like Sync Rules), false (default) for on-demand + priority: number # sync priority + accept_potentially_dangerous_queries: boolean # silence warnings on dangerous queries +``` + +Basic example: + +```yaml sync_streams.yaml +config: + edition: 2 + +streams: + issue: # Syncs a specific issue by ID when requested + query: select * from issues where id = subscription.parameters() ->> 'id' + issue_comments: # Syncs all comments for a specific issue when requested + query: select * from comments where issue_id = subscription.parameters() ->> 'id' +``` + +### Real-World Examples + +#### Global Data vs User-Specific Data + +**Global data** (synced to all users by default): +```yaml +streams: + all_categories: + query: SELECT * FROM categories + auto_subscribe: true # Syncs all categories to every user when they connect +``` + +**User-specific data** (synced based on user ID): +```yaml +streams: + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() + auto_subscribe: true # Syncs all todos owned by the current user when they connect +``` + +#### Todolist App + +```yaml +streams: + # Global categories + categories: + query: SELECT * FROM categories + auto_subscribe: true # Syncs all categories to every user when they connect + + # User's lists + user_lists: + query: SELECT * FROM lists WHERE owner_id = auth.user_id() + auto_subscribe: true # Syncs all lists owned by the current user when they connect + + # Todos for a specific list (on-demand) + list_todos: + query: SELECT * FROM todos WHERE list_id = subscription.parameter('list_id') # Syncs todos for a specific list when requested +``` + +#### Chat App + +```yaml +streams: + # todo +``` + +#### Social Feed + +```yaml +streams: + # todo +``` + +#### Workout Tracker + +```yaml +streams: + # todo +``` + +#### Issue Tracker (Web) + +```yaml +streams: + # todo +``` + +## Using Parameters + +Sync Streams support three types of parameters: + +- **Subscription parameters**: `subscription.parameter('key')` - per-subscription, recommended for on-demand data +- **Auth parameters**: `auth.user_id()` and `auth.parameters()` - from JWT token +- **Connection parameters**: `connection.parameter('key')` - global client parameters + +For detailed information on parameters, see the [Query Syntax](/usage/sync-streams/reference/query-syntax) reference. + +## Using Streams on the Client + +Each SDK lets you: + +* Use `db.syncStream(name, [subscription-params])` to get a `SyncStream` instance. +* Call `subscribe()` on a `SyncStream` to get a `SyncStreamSubscription`. +* Inspect `SyncStatus` for download progress and subscription information. +* Configure TTL (time-to-live) for caching behavior after unsubscribe. + +For specific SDK details and examples, see the [Client SDK References](/client-sdk-references/introduction). + +## Developer Notes + +### Troubleshooting +Use the [Sync Diagnostics Client](/usage/tools/diagnostic-app) to troubleshoot sync issues. + +### SQL Limitations +Sync Streams use SQL-like syntax, but not all SQL is supported. See the [Operators and Functions](/usage/sync-streams/reference/operators-and-functions) page for details. + +### Joins +Joins are not fully supported. See the [Many-to-Many and Join Tables](/usage/sync-streams/reference/joins) guide for workarounds. + +### Type Conversion +Most database types are converted to text on the client. See the [Types](/usage/sync-streams/reference/types) page for the complete mapping. + +### Primary Key Requirements +For each table, PowerSync requires a single primary key column called `id`, of type `text`. MongoDB uses `_id`. See the [Client Primary Key](/usage/sync-streams/reference/client-primary-key) page for details. + +### Case Sensitivity +We recommend using only lower case identifiers for all table/collection and column/field names. If you need to use a different case, see the [Case Sensitivity](/usage/sync-streams/reference/case-sensitivity) page. + +### Bucket Limit +Sync Streams use sync buckets under the hood, and there's a 1,000 bucket limit. See the [Under the Hood](/usage/sync-streams/under-the-hood) page for details on how to estimate your bucket usage. + +## Next Steps +- Learn how to write more advanced stream definitions using the [Query Syntax](/usage/sync-streams/reference/query-syntax) reference +- Check out the [Client SDK References](/client-sdk-references/introduction) for your platform to use Sync Streams in your client app diff --git a/usage/sync-streams/reference/case-sensitivity.mdx b/usage/sync-streams/reference/case-sensitivity.mdx new file mode 100644 index 00000000..d7236fa8 --- /dev/null +++ b/usage/sync-streams/reference/case-sensitivity.mdx @@ -0,0 +1,43 @@ +--- +title: "Case Sensitivity" +description: "For simplicity, we recommend using only lower case identifiers for all table/collection and column/field names used in PowerSync. If you need to use a different case, continue reading." +--- + +### Case in Sync Streams + +PowerSync converts all table/collection and column/field names to lower-case by default in Sync Stream queries (this is how Postgres also behaves). To preserve the case, surround the names with double quotes, for example: + +```sql +SELECT "ID" as id, "Description", "ListID" FROM "TODOs" WHERE "TODOs"."ListID" = subscription.parameter('list_id') +``` + +When using `SELECT *`, the original case is preserved for the returned columns/fields. + +### Client-Side Case + +On the client side, the case of table and column names in the [client-side schema](/installation/client-side-setup/define-your-schema) must match the case produced by Sync Streams exactly. For the above example, use the following in Dart: + +```dart + Table('TODOs', [ + Column.text('Description'), + Column.text('ListID') + ]) +``` + +SQLite itself is case-insensitive. When querying and modifying the data on the client, any case may be used. For example, the above table may be queried using `SELECT description FROM todos WHERE listid = ?`. + +Operations (`PUT`/`PATCH`/`DELETE`) are stored in the upload queue using the case as defined in the schema above for table and column names, not the case used in queries. + +As another example, in this Sync Stream query: + +```sql +SELECT ID, todo_description as Description FROM todo_items as TODOs +``` + +Each identifier in the example is unquoted and converted to lower case. That means the client-side schema would be: + +```dart +Table('todos', [ + Column.text('description') +]) +``` diff --git a/usage/sync-streams/reference/client-primary-key.mdx b/usage/sync-streams/reference/client-primary-key.mdx new file mode 100644 index 00000000..1c76f7de --- /dev/null +++ b/usage/sync-streams/reference/client-primary-key.mdx @@ -0,0 +1,64 @@ +--- +title: "Client Primary Key" +description: "On the client, PowerSync only supports a single primary key column called `id`, of type `text`." +--- + +For tables where the client will create new rows: + +- Postgres and MySQL: use a UUID for `id`. Use the `uuid()` helper to generate a random UUID (v4) on the client. +- MongoDB: use an `ObjectId` for `_id`. Generate an `ObjectId()` in your app code and store it in the client's `id` column as a string; this will map to MongoDB's `_id`. + +To use a different column/field from the server-side database as the record ID on the client, use a column/field alias in your Sync Streams: + +```sql +SELECT client_id as id FROM my_data +``` + + + MongoDB uses `_id` as the name of the ID field in collections. Therefore, PowerSync requires using `SELECT _id as id` in [Sync Streams](/usage/sync-streams/quick-start) when using MongoDB as the backend source database. When inserting new documents from the client, prefer `ObjectId` values for `_id` (stored in the client's `id` column). + + +Custom transformations could also be used for the ID, for example: + +```sql +-- Concatenate multiple columns into a single id column +SELECT org_id || '.' || record_id as id FROM my_data +``` + + + If you want to upload data to a table with a custom record ID, ensure that `uploadData()` isn't blindly using a field named `id` when handling CRUD operations. See the [Sequential ID mapping tutorial](/tutorials/client/data/sequential-id-mapping#update-client-to-use-uuids) for an example where the record ID is aliased to `uuid` on the backend. + + +PowerSync does not perform any validation that IDs are unique. Duplicate IDs on a client could occur in any of these scenarios: + +1. A non-unique column is used for the ID. +2. Multiple table partitions are used (Postgres), with the same ID present in different partitions. +3. Multiple data queries returning the same record. This is typically not an issue if the queries return the same values (same transformations used in each query). + +We recommend using a unique index on the fields in the source database to ensure uniqueness — this will prevent (1) at least. + +If the client does sync multiple records with the same ID, only one will be present in the final database. This would typically be the one modified last, but this is subject to change — do not depend on any specific record being picked. + +### Postgres: Strategies for Auto-Incrementing IDs + +With auto-incrementing / sequential IDs (e.g. `sequence` type in Postgres), the issue is that the ID can only be generated on the server, and not on the client while offline. If this _must_ be used, there are some options, depending on the use case. + +#### Option 1: Generate ID when server receives record + +If the client does not use the ID as a reference (foreign key) elsewhere, insert any unique value on the client in the `id` field, then generate a new ID when the server receives it. + +#### Option 2: Pre-create records on the server + +For some use cases, it could work to have the server pre-create a set of e.g. 100 draft records for each user. While offline, the client can populate these records without needing to generate new IDs. This is similar to providing an employee with a paper book of blank invoices — each with an invoice number pre-printed. + +This does mean that a user has a limit on how many records can be populated while offline. + +Care must be taken if a user can populate the same records from different devices while offline — ideally each device must have a unique set of pre-created records. + +#### Option 3: Use an ID mapping + +Use UUIDs on the client, then map them to sequential IDs when performing an update on the server. This allows using a sequential primary key for each record, with a UUID as a secondary ID. + +This mapping must be performed wherever the UUIDs are referenced, including for every foreign key column. + +For more information, have a look at the [Sequential ID Mapping tutorial](/tutorials/client/data/sequential-id-mapping). diff --git a/usage/sync-streams/reference/joins.mdx b/usage/sync-streams/reference/joins.mdx new file mode 100644 index 00000000..cbc9e7ca --- /dev/null +++ b/usage/sync-streams/reference/joins.mdx @@ -0,0 +1,114 @@ +--- +title: "Many-to-Many and Join Tables" +description: "Strategies for handling many-to-many relationships and join tables in Sync Streams." +--- + +Join tables are often used to implement many-to-many relationships between tables. While Sync Streams support limited subqueries, they don't support full JOIN operations. This guide contains recommended strategies for handling these relationships. + +## Example Schema + +As an example, consider a social media application. The app has message boards. Each user can subscribe to boards, make posts, and comment on posts. Posts may also have one or more topics. + + + + + + +```sql +create table users ( + id uuid not null default gen_random_uuid (), + name text not null, + last_activity timestamp with time zone, + constraint users_pkey primary key (id) +); + +create table boards ( + id uuid not null default gen_random_uuid (), + name text not null, + constraint boards_pkey primary key (id) + ); + +create table posts ( + id uuid not null default gen_random_uuid (), + board_id uuid not null, + created_at timestamp with time zone not null default now(), + author_id uuid not null, + title text not null, + body text not null, + constraint posts_pkey primary key (id), + constraint posts_author_id_fkey foreign key (author_id) references users (id), + constraint posts_board_id_fkey foreign key (board_id) references boards (id) + ); + +create table comments ( + id uuid not null default gen_random_uuid (), + post_id uuid not null, + created_at timestamp with time zone not null, + author_id uuid not null, + body text not null, + constraint comments_pkey primary key (id), + constraint comments_author_id_fkey foreign key (author_id) references users (id), + constraint comments_post_id_fkey foreign key (post_id) references posts (id) + ); + +create table board_subscriptions ( + id uuid not null default gen_random_uuid (), + user_id uuid not null, + board_id uuid not null, + constraint board_subscriptions_pkey primary key (id), + constraint board_subscriptions_board_id_fkey foreign key (board_id) references boards (id), + constraint board_subscriptions_user_id_fkey foreign key (user_id) references users (id) + ); + +create table topics ( + id uuid not null default gen_random_uuid (), + label text not null + ); + +create table post_topics ( + id uuid not null default gen_random_uuid (), + board_id uuid not null, + post_id uuid not null, + topic_id uuid not null, + constraint post_topics_pkey primary key (id), + constraint post_topics_board_id_fkey foreign key (board_id) references boards (id), + constraint post_topics_post_id_fkey foreign key (post_id) references posts (id), + constraint post_topics_topic_id_fkey foreign key (topic_id) references topics (id) + ); +``` + + +## Strategy 1: Using Subqueries (Recommended for Sync Streams) + +Sync Streams support limited subqueries using `IN (subquery)` syntax, which provides the most flexible approach to handling many-to-many relationships. + +### User's Subscribed Boards + +For this app, we generally want to sync all posts in boards that users have subscribed to. To simplify these examples, we assume a user has to be subscribed to a board to post. + +Boards make a nice grouping of data for Sync Streams: We sync the boards that a user has subscribed to, and the same board data is synced to all users subscribed to that board. + +The relationship between users and boards is a many-to-many, specified via the `board_subscriptions` table. + +```yaml +streams: + # Get boards the user is subscribed to + user_boards: + query: SELECT * FROM boards WHERE id IN ( + SELECT board_id FROM board_subscriptions WHERE user_id = auth.user_id() + ) # Syncs all boards the user is subscribed to when they connect + auto_subscribe: true + + # Get posts from user's subscribed boards + user_board_posts: + query: SELECT * FROM posts WHERE board_id IN ( + SELECT board_id FROM board_subscriptions WHERE user_id = auth.user_id() + ) # Syncs all posts from boards the user is subscribed to when they connect + auto_subscribe: true +``` + +### Board Comments + +todo \ No newline at end of file diff --git a/usage/sync-streams/reference/operators-and-functions.mdx b/usage/sync-streams/reference/operators-and-functions.mdx new file mode 100644 index 00000000..ac414df5 --- /dev/null +++ b/usage/sync-streams/reference/operators-and-functions.mdx @@ -0,0 +1,58 @@ +--- +title: "Operators and Functions" +description: "Operators and functions can be used to transform columns/fields before being synced to a client." +--- + +When filtering on parameters in Sync Streams definitions, operators can only be used in a limited way. Typically only `=` , `IN` and `IS NULL` are allowed on the parameters, and special limits apply when combining clauses with `AND`, `OR` or `NOT`. + +When transforming output columns/fields, or filtering on row/document values, those restrictions do not apply. + +If a specific operator or function is needed, please [contact us](/resources/contact-us) so that we can consider inclusion in our roadmap. + +Some fundamental restrictions on these operators and functions are: + +1. It must be deterministic — no random or time-based functions. +2. No external state can be used. +3. It must operate on data available within a single row/document. For example, no aggregation functions allowed. + +## Operators + +| Operator | Notes | +| ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| Comparison:
`= != < > <= >=` | If either parameter is null, this evaluates to null. | +| Null:
`IS NULL`, `IS NOT NULL` | | +| Mathematical:
`+ - * /` | | +| Logical:
`AND`, `OR`, `NOT` | | +| Cast:
`CAST(x AS type)`
`x :: type` | Cast to text, numeric, integer, real or blob. | +| JSON:
`json -> 'path'`
`json ->> 'path'` | `->` Returns the value as a JSON string.
`->>` Returns the extracted value. | +| Text concatenation:
`\|\|` | Joins two text values together. | +| Arrays:
` IN ` | Returns true if the `left` value is present in the `right` JSON array.
Differs from the SQLite operator in that it can be used directly on a JSON array. | + +## Functions + +| Function | Description | +| -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| [upper(text)](https://www.sqlite.org/lang_corefunc.html#upper) | Convert text to upper case. | +| [lower(text)](https://www.sqlite.org/lang_corefunc.html#lower) | Convert text to lower case. | +| [substring(text, start, length)](https://sqlite.org/lang_corefunc.html#substr) | Extracts a portion of a string based on specified start index and length. Start index is 1-based. Example: `substring(created_at, 1, 10)` returns the date portion of the timestamp. | +| [hex(data)](https://www.sqlite.org/lang_corefunc.html#hex) | Convert blob or text data to hexadecimal text. | +| base64(data) | Convert blob or text data to base64 text. | +| [length(data)](https://www.sqlite.org/lang_corefunc.html#length) | For text, return the number of characters. For blob, return the number of bytes. For null, return null. For integer and real, convert to text and return the number of characters. | +| [typeof(data)](https://www.sqlite.org/lang_corefunc.html#typeof) | text, integer, real, blob or null | +| [json\_each(data)](https://www.sqlite.org/json1.html#jeach) | Expands a JSON array or object from a request or token parameter into a set of parameter rows. Example: `SELECT value as project_id FROM json_each(auth.parameters() -> 'project_ids'` | +| [json\_extract(data, path)](https://www.sqlite.org/json1.html#jex) | Same as `->>` operator, but the path must start with `$.` | +| [json\_array\_length(data)](https://www.sqlite.org/json1.html#jarraylen) | Given a JSON array (as text), returns the length of the array. If data is null, returns null. If the value is not a JSON array, returns 0. | +| [json\_valid(data)](https://www.sqlite.org/json1.html#jvalid) | Returns 1 if the data can be parsed as JSON, 0 otherwise. | +| json\_keys(data) | Returns the set of keys of a JSON object as a JSON array. Example: `select * from items where user_id in json_keys(permissions_json)` | +| [ifnull(x,y)](https://www.sqlite.org/lang_corefunc.html#ifnull) | Returns x if non-null, otherwise returns y. | +| [iif(x,y,z)](https://www.sqlite.org/lang_corefunc.html#iif) | Returns y if x is true, otherwise returns z. | +| [uuid_blob(id)](https://sqlite.org/src/file/ext/misc/uuid.c) | Convert a UUID string to bytes. | +| [unixepoch(datetime, \[modifier\])](https://www.sqlite.org/lang_datefunc.html) | Returns a datetime as Unix timestamp. If modifier is "subsec", the result is a floating point number, with milliseconds including in the fraction. The datetime argument is required - this function cannot be used to get the current time. | +| [datetime(datetime, \[modifier\])](https://www.sqlite.org/lang_datefunc.html) | Returns a datetime as a datetime string, in the format YYYY-MM-DD HH:MM:SS. If the specifier is "subsec", milliseconds are also included. If the modifier is "unixepoch", the argument is interpreted as a unix timestamp. Both modifiers can be included: datetime(timestamp, 'unixepoch', 'subsec'). The datetime argument is required - this function cannot be used to get the current time. | +| [ST\_AsGeoJSON(geometry)](https://postgis.net/docs/ST_AsGeoJSON.html) | Convert [PostGIS](https://postgis.net/) (in Postgres) geometry from WKB to GeoJSON. Combine with JSON operators to extract specific fields. | +| [ST\_AsText(geometry)](https://postgis.net/docs/ST_AsText.html) | Convert [PostGIS](https://postgis.net/) (in Postgres) geometry from WKB to Well-Known Text (WKT). | +| [ST\_X(point)](https://postgis.net/docs/ST_X.html) | Get the X coordinate of a [PostGIS](https://postgis.net/) point (in Postgres) | +| [ST\_Y(point)](https://postgis.net/docs/ST_Y.html) | Get the Y coordinate of a [PostGIS](https://postgis.net/) point (in Postgres) | + + +Most of these functions are based on the [built-in SQLite functions](https://www.sqlite.org/lang_corefunc.html) and [SQLite JSON functions](https://www.sqlite.org/json1.html). diff --git a/usage/sync-streams/reference/query-syntax.mdx b/usage/sync-streams/reference/query-syntax.mdx new file mode 100644 index 00000000..30d5d229 --- /dev/null +++ b/usage/sync-streams/reference/query-syntax.mdx @@ -0,0 +1,291 @@ +--- +title: "Query Syntax" +description: "Guide to writing Sync Stream queries with parameters, subqueries, and data transformations." +--- + +## Parameters Overview + +Sync Streams support three types of parameters for filtering and grouping data: + +### Subscription Parameters + +**Subscription parameters** are passed from the client when it subscribes to a Sync Stream. These are the most flexible and recommended approach for on-demand syncing. +Clients can subscribe to the same stream multiple times with different parameters. + +```yaml +# Stream definition +streams: + issue_details: + query: SELECT * FROM issues WHERE id = subscription.parameter('id') # Syncs a specific issue by ID when requested +``` + +```js +// Client usage +const sub = await powerSync.syncStream('issue_details', {id: 'issue-123'}).subscribe(); +``` + +**Accessing subscription parameters:** +- `subscription.parameters()` - all parameters for the subscription, as JSON +- `subscription.parameter('key')` - shorthand for getting a single specific parameter + +### Auth Parameters + +**Auth parameters** come from the JWT authentication token. These are trusted and can be used for access control. + +```yaml +streams: + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() # Syncs all todos owned by the current user +``` + +**Accessing auth parameters:** +- `auth.parameters()` - entire JWT payload as JSON +- `auth.parameter('key')` - shorthand for getting a single specific token payload parameter +- `auth.user_id()` - shorthand for JWT subject (`auth.parameter(`sub`)`) + +### Connection Parameters + +**Connection parameters** are specified globally on the connection level. These are equivalent to [Client Parameters](/usage/sync-rules/advanced-topics/client-parameters) in Sync Rules. + +```yaml +streams: + project_issues: + query: SELECT * FROM issues WHERE project_id = connection.parameter('project_id') # Syncs all issues for a specific project +``` + +**Accessing connection parameters:** +- `connection.parameters()` - all parameters for the connection, as JSON +- `connection.parameter('key')` - shorthand for getting a single specific parameter + +## Grouping and Filtering Data + +### User-Specific Data + +Filter data based on the authenticated user: + +```yaml +streams: + user_lists: + query: SELECT * FROM lists WHERE owner_id = auth.user_id() # Syncs all lists owned by the current user + + user_todos: + query: SELECT * FROM todos WHERE owner_id = auth.user_id() # Syncs all todos owned by the current user +``` + +### Organization-Based Data + +Filter data based on user's organization: + +```yaml +streams: + org_projects: + query: SELECT * FROM projects WHERE org_id = auth.parameter('org_id') # Syncs all projects in the user's organization + + org_members: + query: SELECT * FROM users WHERE org_id = auth.parameter('org_id') # Syncs all users in the user's organization +``` + +### Permission-Based Data + +Filter data based on user permissions: + +```yaml +streams: + accessible_boards: + query: SELECT * FROM boards WHERE id IN ( + SELECT board_id FROM board_permissions + WHERE user_id = auth.user_id() + ) # Syncs all boards the user has permission to access +``` + +### Dynamic Filtering + +Use subscription parameters for dynamic filtering: + +```yaml +streams: + posts_by_category: + query: SELECT * FROM posts + WHERE category = subscription.parameter('category') # Syncs all posts for a specific category when requested +``` + +### Simple Filtering + +Use only supported operators (`=`, `!=`, `IN`, `IS NULL`, `IS NOT NULL`) for filtering: + +```yaml +streams: + user_todos: + query: SELECT * FROM todos + WHERE user_id = auth.user_id() # Syncs all todos for the current user +``` + +**Note:** Date range filtering (like `created_at > subscription.parameter('since_date')`) is not supported. For date-based filtering, consider: + +todo + +## Subqueries + +**Subqueries** are queries within queries that let you filter data based on related information from other tables. Think of them as a way to "sync all X where X is related to Y that meets some condition." + +### Basic Subquery Syntax + +Sync Streams support limited subqueries using `IN (subquery)` syntax: + +```yaml +streams: + user_tasks: + query: SELECT * FROM tasks + WHERE project_id IN ( + SELECT id FROM projects WHERE owner_id = auth.user_id() + ) # Syncs only tasks from projects owned by the current user +``` + +**How this works:** +1. The inner query or subquery (`SELECT id FROM projects WHERE owner_id = auth.user_id()`) finds all project IDs owned by the current user +2. The outer query (`SELECT * FROM tasks WHERE project_id IN (...)`) gets all tasks that belong to those projects +3. Result: Users only sync tasks from projects they own + +### Basic Subqueries + +```yaml +streams: + user_posts: + query: SELECT * FROM posts WHERE author_id = auth.user_id() # Syncs all posts by the current user + + user_post_comments: + query: SELECT * FROM comments WHERE post_id IN ( + SELECT id FROM posts WHERE author_id = auth.user_id() + ) # Syncs all comments on posts by the current user +``` + +### Subqueries with Multiple Conditions + +```yaml +streams: + # todo +``` + +## Selecting Columns + +### Select All Columns + +```yaml +streams: + all_todos: + query: SELECT * FROM todos # Syncs all todos with all columns +``` + +### Select Specific Columns + +```yaml +streams: + todo_summary: + query: SELECT id, title, completed, created_at FROM todos # Syncs only specific columns from all todos +``` + +## Renaming Columns + +Use column aliases to rename columns in the output: + +```yaml +streams: + todos: + query: SELECT + id, + title, + completed as is_done, + created_timestamp AS created_at, + updated_timestamp AS updated_at + FROM todos # Syncs todos with renamed columns for better client-side usage +``` + +## Transforming Columns + +### Type Casting + +```yaml +streams: + todos: + query: SELECT + id, + title, + completed::text as completed_text, + CAST(priority as TEXT) as priority_text, + item_number::text AS item_number + FROM todos # Syncs todos with type-cast columns for consistent client-side data types +``` + +### JSON Field Extraction + +```yaml +streams: + todos_with_metadata: + query: SELECT + id, + title, + metadata_json ->> 'description' AS description, + metadata_json ->> 'tags' AS tags, + settings_json ->> 'notifications' AS notifications_enabled + FROM todos # Syncs todos with extracted JSON fields as separate columns +``` + +### Binary Data Conversion + +```yaml +streams: + todos_with_images: + query: SELECT + id, + title, + base64(thumbnail) AS thumbnail_base64, + hex(image_data) AS image_hex + FROM todos # Syncs todos with binary data converted to text format +``` + +### Date/Time Conversions + +```yaml +streams: + todos_with_timestamps: + query: SELECT + id, + title, + unixepoch(created_at) AS created_timestamp, + datetime(updated_at, 'unixepoch') AS updated_datetime + FROM todos # Syncs todos with converted date/time formats for client compatibility +``` + +### String Operations + +```yaml +streams: + todos_formatted: + query: SELECT + id, + upper(title) AS title_upper, + lower(description) AS description_lower, + substring(created_at, 1, 10) AS created_date + FROM todos # Syncs todos with string transformations applied +``` + +## Best Practices + +### Combine Auth and Subscription Parameters + +You can combine different parameter types in a single query for more sophisticated access control: + +```yaml +streams: + user_project_tasks: + query: SELECT * FROM tasks + WHERE project_id = subscription.parameter('project_id') + AND project_id IN ( + SELECT id FROM projects WHERE owner_id = auth.user_id() + ) # Syncs tasks from a specific project, but only if the user owns that project +``` + +This example demonstrates: +- **Subscription parameter**: `project_id` passed by the client when subscribing +- **Auth parameter**: `auth.user_id()` from the JWT to verify ownership +- **Security**: The subquery ensures users can only access tasks from projects they own diff --git a/usage/sync-streams/reference/types.mdx b/usage/sync-streams/reference/types.mdx new file mode 100644 index 00000000..bc074037 --- /dev/null +++ b/usage/sync-streams/reference/types.mdx @@ -0,0 +1,107 @@ +--- +title: "Types" +description: "Sync Streams use the [SQLite type system](https://www.sqlite.org/datatype3.html)." +--- + +The supported client-side SQLite types are: + +1. `null` +2. `integer`: a 64-bit signed integer +3. `real`: a 64-bit floating point number +4. `text`: An UTF-8 text string +5. `blob`: Binary data + +## Postgres Type Mapping + +Binary data in Postgres can be accessed in Sync Streams, but cannot be synced directly to clients (it needs to be converted to hex or base64 first — see below). + +Postgres values are mapped according to this table: + +| Postgres Data Type | PowerSync / SQLite Column Type | Notes | +|--------------------|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| text, varchar | text | | +| int2, int4, int8 | integer | | +| numeric / decimal | text | These types have arbitrary precision in Postgres, so can only be represented accurately as text in SQLite | +| bool | integer | 1 for true, 0 for false | +| float4, float8 | real | | +| enum | text | | +| uuid | text | | +| timestamptz | text | Format: `YYYY-MM-DD hh:mm:ss.sssZ`. This is compatible with ISO8601 and SQLite's functions. Precision matches the precision used in Postgres. `-infinity` becomes `0000-01-01 00:00:00Z` and `infinity` becomes `9999-12-31 23:59:59Z`. | +| timestamp | text | Format: `YYYY-MM-DD hh:mm:ss.sss`. In most cases, timestamptz should be used instead. `-infinity` becomes `0000-01-01 00:00:00` and `infinity` becomes `9999-12-31 23:59:59`. | +| date, time | text | | +| json, jsonb | text | There is no dedicated JSON type — JSON functions operate directly on text values. | +| interval | text | | +| macaddr | text | | +| inet | text | | +| bytea | blob | Cannot sync directly to client — convert to hex or base64 first. See [Operators & Functions](/usage/sync-streams/reference/operators-and-functions). | +| geometry (PostGIS) | text | hex string of the binary data Use the [ST functions](/usage/sync-streams/reference/operators-and-functions#functions) to convert to other formats | +| Arrays | text | JSON array. | +| `DOMAIN` types | text / depends | Depending on [compatibility options](/usage/sync-streams/advanced/compatibility#custom-postgres-types), inner type or raw wire representation (legacy). | +| Custom types | text | Dependig on [compatibility options](/usage/sync-streams/advanced/compatibility#custom-postgres-types), JSON object or raw wire representation (legacy). | +| (Multi-)ranges | text | Depending on [compatibility options](/usage/sync-streams/advanced/compatibility#custom-postgres-types), JSON object (array for multi-ranges) or raw wire representation (legacy). | + +There is no dedicated boolean data type. Boolean values are represented as `1` (true) or `0` (false). + +`json` and `jsonb` values are treated as `text` values in their serialized representation. JSON functions and operators operate directly on these `text` values. + +## MongoDB Type Mapping + +| BSON Type | PowerSync / SQLite Column Type | Notes | +| ------------------ | ------------------------------ | ----- | +| String | text | | +| Int, Long | integer | | +| Double | real | | +| Decimal128 | text | | +| Object | text | Converted to a JSON string | +| Array | text | Converted to a JSON string | +| ObjectId | text | Lower-case hex string | +| UUID | text | Lower-case hex string | +| Boolean | integer | 1 for true, 0 for false | +| Date | text | Format: `YYYY-MM-DD hh:mm:ss.sssZ` | +| Null | null | | +| Binary | blob | Cannot sync directly to client — convert to hex or base64 first. See [Operators & Functions](/usage/sync-streams/reference/operators-and-functions). | +| Regular Expression | text | JSON text in the format `{"pattern":"...","options":"..."}` | +| Timestamp | integer | Converted to a 64-bit integer | +| Undefined | null | | +| DBPointer | text | JSON text in the format `{"collection":"...","oid":"...","db":"...","fields":...}` | +| JavaScript | text | JSON text in the format `{"code": "...", "scope": ...}` | +| Symbol | text | | +| MinKey, MaxKey | null | | + +* Data is converted to a flat list of columns, one column per top-level field in the MongoDB document. +* Special BSON types are converted to plain SQLite alternatives. +* For example, `ObjectId`, `Date`, `UUID` are all converted to a plain `TEXT` column. +* Nested objects and arrays are converted to JSON arrays, and JSON operators can be used to query them (in the Sync Streams and/or on the client-side). +* Binary data nested in objects or arrays is not supported. + +## MySQL (Beta) Type Mapping + +MySQL values are mapped according to this table: + +| MySQL Data Type | PowerSync / SQLite Column Type | Notes | +|----------------------------------------------------|--------------------------------|-----------------------------------------------------------------------------------| +| tinyint, smallint, mediumint, bigint, integer, int | integer | | +| numeric, decimal | text | | +| bool, boolean | integer | 1 for true, 0 for false | +| float, double, real | real | | +| bit | integer | | +| enum | text | | +| set | text | Converted to JSON array | +| char, varchar | text | | +| tinytext, text, mediumtext, longtext | text | | +| timestamp | text | ISO 8061 format: `YYYY-MM-DDTHH:mm:ss.sssZ` | +| date | text | Format: `YYYY-MM-DD` | +| time, datetime | text | ISO 8061 format: `YYYY-MM-DDTHH:mm:ss.sssZ` | +| year | text | | +| json | text | There is no dedicated JSON type — JSON functions operate directly on text values. | +| binary, varbinary | blob | See note below regarding binary types | +| blob, tinyblob, mediumblob, longblob | blob | | +| geometry, geometrycollection | blob | | +| point, multipoint | blob | | +| linestring, multilinestring | blob | | +| polygon, multipolygon | blob | | + + + Binary data can be accessed in the Sync Streams, but before it can be synced directly to clients it needs to be converted to hex or base64 first. + See [Operators & Functions](/usage/sync-streams/reference/operators-and-functions) + diff --git a/usage/sync-streams/under-the-hood.mdx b/usage/sync-streams/under-the-hood.mdx new file mode 100644 index 00000000..d41815cc --- /dev/null +++ b/usage/sync-streams/under-the-hood.mdx @@ -0,0 +1,10 @@ +--- +title: "Under the Hood" +description: "How Sync Streams achieve partial sync and work internally with buckets, subscriptions, and TTL." +--- + +## How Partial Sync Works + +PowerSync Sync Streams enable **partial sync** - the ability to control which data gets synced to which devices. This is essential for building scalable applications where you don't want to sync all data to every client. + +todo \ No newline at end of file