Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions src/export/dump.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,22 @@ beforeEach(() => {
describe('Database Dump Module', () => {
it('should return a database dump when tables exist', async () => {
vi.mocked(executeOperation)
// table list
.mockResolvedValueOnce([{ name: 'users' }, { name: 'orders' }])
// users schema
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
])
// users data batch (only one batch needed)
.mockResolvedValueOnce([
{ id: 1, name: 'Alice' },
{ id: 2, name: 'Bob' },
])
// orders schema
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE orders (id INTEGER, total REAL);' },
])
// orders data batch
.mockResolvedValueOnce([
{ id: 1, total: 99.99 },
{ id: 2, total: 49.5 },
Expand All @@ -71,13 +76,13 @@ describe('Database Dump Module', () => {
expect(dumpText).toContain(
'CREATE TABLE users (id INTEGER, name TEXT);'
)
expect(dumpText).toContain("INSERT INTO users VALUES (1, 'Alice');")
expect(dumpText).toContain("INSERT INTO users VALUES (2, 'Bob');")
expect(dumpText).toContain('INSERT INTO "users" VALUES (1, \'Alice\');')
expect(dumpText).toContain('INSERT INTO "users" VALUES (2, \'Bob\');')
expect(dumpText).toContain(
'CREATE TABLE orders (id INTEGER, total REAL);'
)
expect(dumpText).toContain('INSERT INTO orders VALUES (1, 99.99);')
expect(dumpText).toContain('INSERT INTO orders VALUES (2, 49.5);')
expect(dumpText).toContain('INSERT INTO "orders" VALUES (1, 99.99);')
expect(dumpText).toContain('INSERT INTO "orders" VALUES (2, 49.5);')
})

it('should handle empty databases (no tables)', async () => {
Expand All @@ -99,6 +104,7 @@ describe('Database Dump Module', () => {
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
])
// empty batch (no data)
.mockResolvedValueOnce([])

const response = await dumpDatabaseRoute(mockDataSource, mockConfig)
Expand All @@ -108,7 +114,7 @@ describe('Database Dump Module', () => {
expect(dumpText).toContain(
'CREATE TABLE users (id INTEGER, name TEXT);'
)
expect(dumpText).not.toContain('INSERT INTO users VALUES')
expect(dumpText).not.toContain('INSERT INTO "users" VALUES')
})

it('should escape single quotes properly in string values', async () => {
Expand All @@ -124,10 +130,52 @@ describe('Database Dump Module', () => {
expect(response).toBeInstanceOf(Response)
const dumpText = await response.text()
expect(dumpText).toContain(
"INSERT INTO users VALUES (1, 'Alice''s adventure');"
"INSERT INTO \"users\" VALUES (1, 'Alice''s adventure');"
)
})

it('should handle NULL values', async () => {
vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'users' }])
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
])
.mockResolvedValueOnce([{ id: 1, name: null }])

const response = await dumpDatabaseRoute(mockDataSource, mockConfig)

const dumpText = await response.text()
expect(dumpText).toContain('INSERT INTO "users" VALUES (1, NULL);')
})

it('should stream data in batches when table has many rows', async () => {
// Simulate a table with 501 rows → should trigger 2 batches (500 + 1)
const batch1 = Array.from({ length: 500 }, (_, i) => ({
id: i + 1,
name: `user${i + 1}`,
}))
const batch2 = [{ id: 501, name: 'user501' }]

vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'users' }])
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
])
.mockResolvedValueOnce(batch1) // first batch
.mockResolvedValueOnce(batch2) // second batch

const response = await dumpDatabaseRoute(mockDataSource, mockConfig)

const dumpText = await response.text()
expect(dumpText).toContain('INSERT INTO "users" VALUES (1, \'user1\');')
expect(dumpText).toContain(
'INSERT INTO "users" VALUES (501, \'user501\');'
)

// Verify LIMIT/OFFSET was used (4 calls: table list, schema, batch1, batch2)
expect(executeOperation).toHaveBeenCalledTimes(4)
})

it('should return a 500 response when an error occurs', async () => {
const consoleErrorMock = vi
.spyOn(console, 'error')
Expand Down
186 changes: 143 additions & 43 deletions src/export/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@ import { StarbaseDBConfiguration } from '../handler'
import { DataSource } from '../types'
import { createResponse } from '../utils'

const BATCH_SIZE = 500 // Fetch rows in batches to avoid memory spikes
const BREATHING_INTERVAL_MS = 10 // Yield control between batches

/**
* Stream a database dump using ReadableStream to avoid OOM on large databases.
*
* For each table we:
* 1. Emit the CREATE TABLE statement (schema)
* 2. Fetch rows in batches of BATCH_SIZE using LIMIT/OFFSET
* 3. Emit INSERT statements for each batch
* 4. Yield control briefly between batches so other requests aren't starved
*
* This keeps peak memory usage proportional to BATCH_SIZE rather than the
* entire database size, and prevents the 30-second Cloudflare Workers timeout
* from killing the request on large databases.
*/
export async function dumpDatabaseRoute(
dataSource: DataSource,
config: StarbaseDBConfiguration
Expand All @@ -15,57 +31,141 @@ export async function dumpDatabaseRoute(
config
)

const tables = tablesResult.map((row: any) => row.name)
let dumpContent = 'SQLite format 3\0' // SQLite file header

// Iterate through all tables
for (const table of tables) {
// Get table schema
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
},
],
dataSource,
config
)

if (schemaResult.length) {
const schema = schemaResult[0].sql
dumpContent += `\n-- Table: ${table}\n${schema};\n\n`
}

// Get table data
const dataResult = await executeOperation(
[{ sql: `SELECT * FROM ${table};` }],
dataSource,
config
)

for (const row of dataResult) {
const values = Object.values(row).map((value) =>
typeof value === 'string'
? `'${value.replace(/'/g, "''")}'`
: value
)
dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
}

dumpContent += '\n'
}

// Create a Blob from the dump content
const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' })
const tables: string[] = tablesResult.map((row: any) => row.name)

const encoder = new TextEncoder()

const stream = new ReadableStream<Uint8Array>({
async start(controller) {
try {
// SQLite file header
controller.enqueue(encoder.encode('SQLite format 3\0'))

for (const table of tables) {
// ── Schema ──────────────────────────────────────────
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name=?;`,
params: [table],
},
],
dataSource,
config
)

if (schemaResult.length) {
const schema = schemaResult[0].sql
controller.enqueue(
encoder.encode(
`\n-- Table: ${table}\n${schema};\n\n`
)
)
}

// ── Data (streamed in batches) ──────────────────────
let offset = 0
let hasMore = true

while (hasMore) {
const dataResult = await executeOperation(
[
{
sql: `SELECT * FROM ${escapeIdent(table)} LIMIT ? OFFSET ?;`,
params: [BATCH_SIZE, offset],
},
],
dataSource,
config
)

if (!dataResult || dataResult.length === 0) {
hasMore = false
break
}

// Build INSERT statements for this batch
const batchLines: string[] = []
for (const row of dataResult) {
const values = Object.values(row).map((value) =>
escapeValue(value)
)
batchLines.push(
`INSERT INTO ${escapeIdent(table)} VALUES (${values.join(', ')});`
)
}
controller.enqueue(
encoder.encode(batchLines.join('\n') + '\n')
)

offset += dataResult.length

// If we got fewer rows than requested, we're done
if (dataResult.length < BATCH_SIZE) {
hasMore = false
}

// Breathing interval – yield to the event loop so
// other in-flight requests can be served.
await sleep(BREATHING_INTERVAL_MS)
}

controller.enqueue(encoder.encode('\n'))
}

controller.close()
} catch (err) {
controller.error(err)
}
},
})

const headers = new Headers({
'Content-Type': 'application/x-sqlite3',
'Content-Disposition': 'attachment; filename="database_dump.sql"',
})

return new Response(blob, { headers })
return new Response(stream, { headers })
} catch (error: any) {
console.error('Database Dump Error:', error)
return createResponse(undefined, 'Failed to create database dump', 500)
}
}

// ── Helpers ────────────────────────────────────────────────────────────────

/** Escape a SQL identifier (table/column name) – very basic but sufficient for generated code. */
function escapeIdent(name: string): string {
return `"${name.replace(/"/g, '""')}"`
}

/** Format a JS value as a SQL literal. */
function escapeValue(value: unknown): string {
if (value === null || value === undefined) {
return 'NULL'
}
if (typeof value === 'string') {
return `'${value.replace(/'/g, "''")}'`
}
if (typeof value === 'number' || typeof value === 'bigint') {
return String(value)
}
if (typeof value === 'boolean') {
return value ? '1' : '0'
}
// ArrayBuffer / Uint8Array → hex blob literal
if (value instanceof Uint8Array || value instanceof ArrayBuffer) {
const bytes =
value instanceof ArrayBuffer ? new Uint8Array(value) : value
const hex = Array.from(bytes)
.map((b) => b.toString(16).padStart(2, '0'))
.join('')
return `X'${hex}'`
}
// Fallback: stringify as quoted text
return `'${String(value).replace(/'/g, "''")}'`
}

function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}