Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,7 @@ site/
*.tmp
tmp/
temp/

# Node.js (for schema bundling)
node_modules/
package-lock.json
62 changes: 62 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,68 @@ pip install pre-commit
pre-commit install
```

### Schema Synchronization

This project includes tooling to detect drift between your Pydantic models and the official [Beacon v2 JSON schemas](https://github.com/ga4gh-beacon/beacon-v2).

#### Prerequisites

```bash
npm install @apidevtools/json-schema-ref-parser
```

#### Sync and Compare

Run the sync script to download schemas and compare against your models:

```bash
# Download latest release and compare
./scripts/sync_beacon_schemas.sh

# Use a specific version
./scripts/sync_beacon_schemas.sh --version v2.1.0

# Clean cached schemas and re-download
./scripts/sync_beacon_schemas.sh --clean
```

Or run just the comparison (if schemas are already downloaded):

```bash
uv run python scripts/compare_models.py
```

#### What It Does

1. **Downloads** Beacon v2 release artifacts from GitHub
2. **Bundles** JSON schemas (resolves all `$ref` references)
3. **Compares** schema fields against `src/beacon_api/models/`
4. **Reports** missing fields, extra fields, and coverage

#### Output

The comparison script reports:
- **Missing fields** - Fields in the schema but not in your model
- **Extra fields** - Custom fields you've added (not in schema)
- **Field counts** - Coverage summary per model

Downloaded schemas are cached in `tmp/` (gitignored):

```
tmp/
├── beacon-v2-schemas/ # Downloaded release artifacts
└── bundled_schemas/ # Resolved JSON schemas
```

#### Workflow for Updating Models

1. Run `./scripts/sync_beacon_schemas.sh`
2. Review the comparison report for missing fields
3. Add missing fields to `src/beacon_api/models/` as needed
4. Re-run to verify coverage

This approach keeps you in control while ensuring your models stay aligned with the upstream specification.

## API Endpoints

### Info
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dev = [
"httpx>=0.27.0",
"ruff>=0.7.0",
"mypy>=1.13.0",
"datamodel-code-generator>=0.53.0",
]

[build-system]
Expand Down
171 changes: 171 additions & 0 deletions scripts/bundle_schemas.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env node
/**
* Bundle Beacon v2 JSON schemas for Pydantic model generation.
*
* Strategy:
* - Entity models: Copy pre-dereferenced schemas from beacon-v2/bin/deref_schemas/
* - Framework schemas: Bundle using json-schema-ref-parser (these resolve correctly)
*
* Usage:
* npm install @apidevtools/json-schema-ref-parser
* node scripts/bundle_schemas.js
*/

const $RefParser = require('@apidevtools/json-schema-ref-parser');
const fs = require('fs');
const path = require('path');

// Configuration
// BEACON_V2_ROOT can be overridden via environment variable
const BEACON_V2_ROOT = process.env.BEACON_V2_ROOT
? path.resolve(process.env.BEACON_V2_ROOT)
: path.resolve(__dirname, '../tmp/beacon-v2-schemas');
const OUTPUT_DIR = path.resolve(__dirname, '../tmp/bundled_schemas');

const FRAMEWORK_DIR = path.join(BEACON_V2_ROOT, 'framework', 'json');
const DEREF_SCHEMAS_DIR = path.join(BEACON_V2_ROOT, 'bin', 'deref_schemas');

// Pre-dereferenced entity schemas (just copy these)
const ENTITY_SCHEMAS = {
'individual': path.join(DEREF_SCHEMAS_DIR, 'individuals', 'defaultSchema.json'),
'biosample': path.join(DEREF_SCHEMAS_DIR, 'biosamples', 'defaultSchema.json'),
'cohort': path.join(DEREF_SCHEMAS_DIR, 'cohorts', 'defaultSchema.json'),
'dataset': path.join(DEREF_SCHEMAS_DIR, 'datasets', 'defaultSchema.json'),
'run': path.join(DEREF_SCHEMAS_DIR, 'runs', 'defaultSchema.json'),
'analysis': path.join(DEREF_SCHEMAS_DIR, 'analyses', 'defaultSchema.json'),
'genomicVariation': path.join(DEREF_SCHEMAS_DIR, 'genomicVariations', 'defaultSchema.json'),
};

// Framework schemas (need bundling)
const FRAMEWORK_SCHEMAS = {
// Common
'common': path.join(FRAMEWORK_DIR, 'common', 'beaconCommonComponents.json'),
'ontologyTerm': path.join(FRAMEWORK_DIR, 'common', 'ontologyTerm.json'),

// Requests
'requestBody': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestBody.json'),
'requestMeta': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestMeta.json'),
'filteringTerms': path.join(FRAMEWORK_DIR, 'requests', 'filteringTerms.json'),

// Responses
'booleanResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconBooleanResponse.json'),
'countResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCountResponse.json'),
'resultsetsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconResultsetsResponse.json'),
'collectionsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCollectionsResponse.json'),
'infoResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconInfoResponse.json'),
'errorResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconErrorResponse.json'),
'filteringTermsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconFilteringTermsResponse.json'),
'mapResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconMapResponse.json'),

// Response Sections
'responseMeta': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResponseMeta.json'),
'resultsets': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResultsets.json'),
};

/**
* Copy a pre-dereferenced schema file
*/
function copySchema(name, sourcePath) {
console.log(`Copying ${name}...`);

if (!fs.existsSync(sourcePath)) {
console.warn(` WARNING: Schema file not found: ${sourcePath}`);
return false;
}

const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
fs.copyFileSync(sourcePath, outputPath);

const stats = fs.statSync(outputPath);
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
return true;
}

/**
* Bundle a framework schema using json-schema-ref-parser
*/
async function bundleSchema(name, schemaPath) {
console.log(`Bundling ${name}...`);

if (!fs.existsSync(schemaPath)) {
console.warn(` WARNING: Schema file not found: ${schemaPath}`);
return false;
}

try {
const schema = await $RefParser.dereference(schemaPath, {
dereference: {
circular: 'ignore'
}
});

const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
fs.writeFileSync(outputPath, JSON.stringify(schema, null, 2));

const stats = fs.statSync(outputPath);
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
return true;
} catch (error) {
console.error(` ERROR bundling ${name}: ${error.message}`);
return false;
}
}

async function main() {
console.log('Beacon v2 Schema Bundler');
console.log('========================\n');
console.log(`Beacon v2 root: ${BEACON_V2_ROOT}`);
console.log(`Output directory: ${OUTPUT_DIR}\n`);

// Check beacon-v2 exists
if (!fs.existsSync(BEACON_V2_ROOT)) {
console.error(`ERROR: Beacon v2 directory not found: ${BEACON_V2_ROOT}`);
process.exit(1);
}

// Check deref_schemas exists
if (!fs.existsSync(DEREF_SCHEMAS_DIR)) {
console.error(`ERROR: Pre-dereferenced schemas not found: ${DEREF_SCHEMAS_DIR}`);
console.error('Make sure your beacon-v2 clone includes the bin/deref_schemas directory.');
process.exit(1);
}

// Create output directory
fs.mkdirSync(OUTPUT_DIR, { recursive: true });

let successful = 0;
let failed = 0;

// Copy pre-dereferenced entity schemas
console.log('--- Entity Schemas (pre-dereferenced) ---\n');
for (const [name, schemaPath] of Object.entries(ENTITY_SCHEMAS)) {
if (copySchema(name, schemaPath)) {
successful++;
} else {
failed++;
}
}

// Bundle framework schemas
console.log('\n--- Framework Schemas (bundling) ---\n');
for (const [name, schemaPath] of Object.entries(FRAMEWORK_SCHEMAS)) {
if (await bundleSchema(name, schemaPath)) {
successful++;
} else {
failed++;
}
}

// Summary
console.log('\n========================');
console.log('Summary:');
console.log(` Successful: ${successful}`);
console.log(` Failed: ${failed}`);
console.log(`\nBundled schemas written to: ${OUTPUT_DIR}`);
console.log('\nNext step: Run the Python model generator:');
console.log(' uv run python scripts/generate_from_bundled.py');

process.exit(failed > 0 ? 1 : 0);
}

main().catch(console.error);
Loading
Loading