From b840ca8a0fc8ad271b91759a6b3dad4f94d6380d Mon Sep 17 00:00:00 2001 From: Natalya Todorova Date: Wed, 25 Jun 2025 10:28:24 +0100 Subject: [PATCH 1/5] ODM-12373: import jobs via multipart/form-data (#398) * ODM-12373: add multipart/form-data to import job schema * ODM-12373: add multipart/form-data to import/samples * ODM-12373: add multipart/form-data to import/libraries, import/preparations * ODM-12373: tmp change paths * ODM-12373: tmp change paths2 * ODM-12373: update studyId param name and desc * ODM-12373: add multipart/form-data to signals import * ODM-12373: tmp change paths3 * !!ODM-12373: tmp fix1 * Revert "!!ODM-12373: tmp fix1" This reverts commit 7117444b106d3f0afdced0277319479c570cbda2. * !!ODM-12373: rename endpoints to /multipart * !!ODM-12373: fix operatioId * ODM-12373: fix descriptions --- openapi/v1/job.yaml | 250 ++++++++++++++++++ openapi/v1/odmApi.yaml | 235 ++++++++++++++++ .../job/ImportAFileFromMultipartRequest.yaml | 22 ++ ...pressionSignalRunFromMultipartRequest.yaml | 31 +++ .../ImportMetadataFromMultipartRequest.yaml | 15 ++ .../ImportSignalRunFomMultipartRequest.yaml | 21 ++ 6 files changed, 574 insertions(+) create mode 100644 openapi/v1/schemas/job/ImportAFileFromMultipartRequest.yaml create mode 100644 openapi/v1/schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml create mode 100644 openapi/v1/schemas/job/ImportMetadataFromMultipartRequest.yaml create mode 100644 openapi/v1/schemas/job/ImportSignalRunFomMultipartRequest.yaml diff --git a/openapi/v1/job.yaml b/openapi/v1/job.yaml index 6014bf02..cf09d79f 100644 --- a/openapi/v1/job.yaml +++ b/openapi/v1/job.yaml @@ -29,6 +29,7 @@ info: version: default-released tags: - name: Data import jobs +- name: Data import via direct file upload - name: Job operations paths: /api/v1/jobs/import/expression: @@ -75,6 +76,41 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/expression/multipart: + post: + description: |- + ## Data + This operation necessitates the URL of a tabular data file, which must be either in TSV or GCT 1.2 format. Consult the user guide for a comprehensive understanding of the file content requirements. The endpoint is capable of handling uploads of any data type, which can be detailed in the parameters section, not only Gene Expression data. + + ## Metadata + It is also possible to optionally supply the URL of a metadata file. This metadata will be used as the original metadata for the created objects. The file is expected to contain single record with metadata describing the uploaded signal file. + ## Metadata file format + * Extension: any, `.zip` and `.gz` extensions are treated as archives and get decompressed + * Format: plain text, Tab-separated format (TSV), attribute names and record values are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence (`U+000D U+000A`) + * Header: the first line is treated as table header that contains attribute names + * Records: the second line contains the values for each of the attributes described in the header line. Values represent single string or list of strings. List values are separated using the "pipe" `|` (`U+007C`) separator. Values are trimmed of whitespace before parsing, and a literal `|` (`U+007C`) character may be escaped by repeating it twice. + * Skip zeros in original data file: If this option is selected, zeros in the file will be ignored, thus conserving time and space. This option is particularly useful for handling very sparse data such as Single Cell data. + operationId: startImportExpressionMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: Import any tabular data from TSV or GCT files via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/flow-cytometry: post: description: |- @@ -120,6 +156,42 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/flow-cytometry/multipart: + post: + description: |- + This operation necessitates the URL of a gated flow cytometry data file, which must be in FACS format. + Consult the user guide for a comprehensive understanding of the file content requirements. + For flow cytometry data in FCS format use expression endpoint. + + When job finishes successfully the following **result** object + can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportFlowCytometryMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import flow-cytometry data and metadata from FACS and TSV files via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/libraries: post: description: |2 @@ -146,6 +218,9 @@ paths: application/json: schema: $ref: "#/components/schemas/ImportMetadataRequest" + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" required: false responses: "200": @@ -161,6 +236,38 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/libraries/multipart: + post: + description: |2 + + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportLibrariesMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import a group of library metadata objects from a TSV file via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/preparations: post: description: |2 @@ -202,6 +309,38 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/preparations/multipart: + post: + description: |2 + + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportPreparationsMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import a group of preparation metadata objects from a TSV file via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/samples: post: description: |2 @@ -243,6 +382,38 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/samples/multipart: + post: + description: |2 + + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportSamplesMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import a group of sample metadata objects from a TSV file via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/study: post: description: |2 @@ -319,6 +490,32 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/variant/multipart: + post: + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n " + operationId: startImportVariantMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import variation data and metadata from VCF and TSV files via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/file: post: description: |- @@ -365,6 +562,51 @@ paths: tags: - Data import jobs x-codegen-request-body-name: body + /api/v1/jobs/import/file/multipart: + post: + description: |- + In order to import a file as an attachment to a study, please fill in the following fields: + + * `data` - a file to import as an attachment. + The file will be associated with a study and will be searchable by its name and metadata. + * `metadata` - an optional metadata file to be used as the original metadata + for the created objects. The file must contain a single record describing the uploaded attachment. + * `studyAccession` - an accession of a study the file will be associated with. + * `dataClass` - file data class with the following possible values: `Bulk + transcriptomics`, `Single-cell transcriptomics`, `Differential abundance (FC, + pval, etc.)`, `Pathway analysis`, `Proteomics`, `Single-cell proteomics`, `Metabolomics`, + `Lipidomics`, `Epigenomics`, `DNA methylation`, `Chemoinformatics`, `Imaging features`, + `Gene panel data`, `Biomarker data`, `Physical measures`, `Blood counts`, `Other body + fluid counts`, `Nanopore`, `Flow Cytometry (FCS)`, `Document`, `Other`. + + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + + ``` + { + "accession": "GSF1234567" + } + ``` + operationId: startImportAFileMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportAFileFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import file as an attachment via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/{jobExecId}/info: get: operationId: info @@ -473,12 +715,20 @@ components: $ref: "./schemas/job/ExceptionTypeAndMessage.yaml" ImportMetadataRequest: $ref: "./schemas/job/ImportMetadataRequest.yaml" + ImportMetadataFromMultipartRequest: + $ref: "./schemas/job/ImportMetadataFromMultipartRequest.yaml" ImportSignalRunRequest: $ref: "./schemas/job/ImportSignalRunRequest.yaml" + ImportSignalRunFomMultipartRequest: + $ref: "./schemas/job/ImportSignalRunFomMultipartRequest.yaml" ImportExpressionSignalRunRequest: $ref: "./schemas/job/ImportExpressionSignalRunRequest.yaml" + ImportExpressionSignalRunFromMultipartRequest: + $ref: "./schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml" ImportAFileRequest: $ref: "./schemas/job/ImportAFileRequest.yaml" + ImportAFileFromMultipartRequest: + $ref: "./schemas/job/ImportAFileFromMultipartRequest.yaml" Info: $ref: "./schemas/job/Info.yaml" JobRuntimeError: diff --git a/openapi/v1/odmApi.yaml b/openapi/v1/odmApi.yaml index 638f6879..68fe26bd 100644 --- a/openapi/v1/odmApi.yaml +++ b/openapi/v1/odmApi.yaml @@ -43,6 +43,7 @@ tags: - name: "Study integration as User" - name: "Variant integration as User" - name: "Data import jobs" +- name: "Data import via direct file upload" - name: "Job operations" - name: "Library SPoT as Curator" - name: "Library SPoT as User" @@ -15949,6 +15950,52 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/expression/multipart: + post: + description: "## Data\n This operation necessitates the URL of a tabular data\ + \ file, which must be either in TSV or GCT 1.2 format. Consult the user guide\ + \ for a comprehensive understanding of the file content requirements. The\ + \ endpoint is capable of handling uploads of any data type, which can be detailed\ + \ in the parameters section, not only Gene Expression data.\n\n## Metadata\n\ + \ It is also possible to optionally supply the URL of a metadata file. This\ + \ metadata will be used as the original metadata for the created objects.\ + \ The file is expected to contain single record with metadata describing the\ + \ uploaded signal file.\n## Metadata file format\n * Extension: any, `.zip`\ + \ and `.gz` extensions are treated as archives and get decompressed\n * Format:\ + \ plain text, Tab-separated format (TSV), attribute names and record values\ + \ are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence\ + \ (`U+000D U+000A`)\n * Header: the first line is treated as table header\ + \ that contains attribute names\n * Records: the second line contains the\ + \ values for each of the attributes described in the header line. Values\ + \ represent single string or list of strings. List values are separated using\ + \ the \"pipe\" `|` (`U+007C`) separator. Values are trimmed of whitespace\ + \ before parsing, and a literal `|` (`U+007C`) character may be escaped by\ + \ repeating it twice.\n * Skip zeros in original data file: If this option\ + \ is selected, zeros in the file will be ignored, thus conserving time and\ + \ space. This option is particularly useful for handling very sparse data\ + \ such as Single Cell data." + operationId: "startImportExpressionMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import any tabular data from TSV or GCT files via multipart/form-data\ + \ upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/flow-cytometry: post: description: "This operation necessitates the URL of a gated flow cytometry\ @@ -15987,6 +16034,36 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/flow-cytometry/multipart: + post: + description: "This operation necessitates the URL of a gated flow cytometry\ + \ data file, which must be in FACS format.\nConsult the user guide for a comprehensive\ + \ understanding of the file content requirements.\nFor flow cytometry data\ + \ in FCS format use expression endpoint.\n\nWhen job finishes successfully\ + \ the following **result** object\ncan be obtained using `GET /job/{id}/output`\ + \ request:\n\n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```" + operationId: "startImportFlowCytometryMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import flow-cytometry data and metadata from FACS and TSV files via\ + \ multipart/form-data upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/libraries: post: description: "\nWhen job finishes successfully the following **result** object\ @@ -16007,6 +16084,9 @@ paths: application/json: schema: $ref: "#/components/schemas/ImportMetadataRequest" + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" required: false responses: "200": @@ -16022,6 +16102,33 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/libraries/multipart: + post: + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportLibrariesMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import a group of library metadata objects from a TSV file via multipart/form-data\ + \ upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/preparations: post: description: "\nWhen job finishes successfully the following **result** object\ @@ -16057,6 +16164,33 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/preparations/multipart: + post: + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportPreparationsMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import a group of preparation metadata objects from a TSV file via\ + \ multipart/form-data upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/samples: post: description: "\nWhen job finishes successfully the following **result** object\ @@ -16092,6 +16226,33 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/samples/multipart: + post: + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportSamplesMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import a group of sample metadata objects from a TSV file via multipart/form-data\ + \ upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/study: post: description: "\nWhen job finishes successfully the following **result** object\ @@ -16162,6 +16323,33 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/variant/multipart: + post: + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n " + operationId: "startImportVariantMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import variation data and metadata from VCF and TSV files via multipart/form-data\ + \ upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/file: post: description: "In order to import a file as an attachment to a study, please\ @@ -16203,6 +16391,45 @@ paths: tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/file/multipart: + post: + description: "In order to import a file as an attachment to a study, please\ + \ fill in the following fields:\n\n* `data` - a file to import as an attachment.\n\ + The file will be associated with a study and will be searchable by its name\ + \ and metadata.\n* `metadata` - an optional metadata file to be used as the\ + \ original metadata\nfor the created objects. The file must contain a single\ + \ record describing the uploaded attachment.\n* `studyAccession` - an accession\ + \ of a study the file will be associated with.\n* `dataClass` - file data\ + \ class with the following possible values: `Bulk\ntranscriptomics`, `Single-cell\ + \ transcriptomics`, `Differential abundance (FC,\npval, etc.)`, `Pathway analysis`,\ + \ `Proteomics`, `Single-cell proteomics`, `Metabolomics`,\n`Lipidomics`, `Epigenomics`,\ + \ `DNA methylation`, `Chemoinformatics`, `Imaging features`,\n`Gene panel\ + \ data`, `Biomarker data`, `Physical measures`, `Blood counts`, `Other body\n\ + fluid counts`, `Nanopore`, `Flow Cytometry (FCS)`, `Document`, `Other`.\n\n\ + When job finishes successfully the following **result** object can be obtained\ + \ using `GET /job/{id}/output` request:\n\n```\n{\n \"accession\": \"GSF1234567\"\ + \n}\n```" + operationId: "startImportAFileMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportAFileFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import file as an attachment via multipart/form-data upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/{jobExecId}/info: get: operationId: "info" @@ -22144,12 +22371,20 @@ components: $ref: "./schemas/job/ExceptionTypeAndMessage.yaml" ImportMetadataRequest: $ref: "./schemas/job/ImportMetadataRequest.yaml" + ImportMetadataFromMultipartRequest: + $ref: "./schemas/job/ImportMetadataFromMultipartRequest.yaml" ImportSignalRunRequest: $ref: "./schemas/job/ImportSignalRunRequest.yaml" + ImportSignalRunFomMultipartRequest: + $ref: "./schemas/job/ImportSignalRunFomMultipartRequest.yaml" ImportExpressionSignalRunRequest: $ref: "./schemas/job/ImportExpressionSignalRunRequest.yaml" + ImportExpressionSignalRunFromMultipartRequest: + $ref: "./schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml" ImportAFileRequest: $ref: "./schemas/job/ImportAFileRequest.yaml" + ImportAFileFromMultipartRequest: + $ref: "./schemas/job/ImportAFileFromMultipartRequest.yaml" Info: $ref: "./schemas/job/Info.yaml" JobRuntimeError: diff --git a/openapi/v1/schemas/job/ImportAFileFromMultipartRequest.yaml b/openapi/v1/schemas/job/ImportAFileFromMultipartRequest.yaml new file mode 100644 index 00000000..55fc15c1 --- /dev/null +++ b/openapi/v1/schemas/job/ImportAFileFromMultipartRequest.yaml @@ -0,0 +1,22 @@ +properties: + metadata: + type: string + format: binary + description: The metadata file to be used as the original metadata for the created objects. The file must contain a single record describing the uploaded attachment. + data: + type: string + format: binary + description: A file to import as an attachment. The file will be associated with a study and will be searchable by its name and metadata. + studyAccession: + example: GSF334953 + type: string + description: An accession of a study the file will be associated with. + dataClass: + example: Proteomics + type: string + description: A mandatory parameter with the value from a limited set of values (see request description). +required: +- data +- studyAccession +- dataClass +type: object diff --git a/openapi/v1/schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml b/openapi/v1/schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml new file mode 100644 index 00000000..d5f182fc --- /dev/null +++ b/openapi/v1/schemas/job/ImportExpressionSignalRunFromMultipartRequest.yaml @@ -0,0 +1,31 @@ +allOf: +- $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" +properties: + numberOfFeatureAttributes: + description: Integer value that specifies the number of columns related to the + feature in the uploaded data frame, and indicates the starting position of the + sample data. This attribute is not needed if the uploaded file is in GCT format. + example: 5 + type: integer + dataClass: + description: "A mandatory parameter with the following possible values: `Bulk + transcriptomics`, `Single-cell transcriptomics`, `Differential abundance (FC, + pval, etc.)`, `Pathway analysis`, `Proteomics`, `Single-cell proteomics`, `Metabolomics`, + `Lipidomics`, `Epigenomics`, `DNA methylation`, `Chemoinformatics`, `Imaging features`, + `Gene panel data`, `Biomarker data`, `Physical measures`, `Blood counts`, `Other body + fluid counts`, `Nanopore`, `Flow Cytometry (FCS)`, `Document`, `Other`. \nIn case the parameter is not set + the dataClass is automatically defined as `Other`." + example: Proteomics + type: string + measurementSeparator: + description: 'This parameter is necessary when your file contains multiple measurement + columns for each sample, library, or preparation. It represents the character + that distinguishes the sample/library/preparation name from the measurement + name in column headers. Supported separators include `. , : ; _ - / \ |`, with + the allowance for multi-character separators.' + example: ':' + type: string +components: + schemas: + ImportSignalRunFomMultipartRequest: + $ref: "./ImportSignalRunFomMultipartRequest.yaml" diff --git a/openapi/v1/schemas/job/ImportMetadataFromMultipartRequest.yaml b/openapi/v1/schemas/job/ImportMetadataFromMultipartRequest.yaml new file mode 100644 index 00000000..47b4d9f3 --- /dev/null +++ b/openapi/v1/schemas/job/ImportMetadataFromMultipartRequest.yaml @@ -0,0 +1,15 @@ +properties: + studyId: + type: string + example: GSF1234567 + description: The ID (accession) of the study for organising files in the internal storage; linking to the target entity must be done through a separate endpoint. + templateId: + example: GSF334953 + type: string + metadata: + type: string + format: binary +required: + - studyId + - metadata +type: object diff --git a/openapi/v1/schemas/job/ImportSignalRunFomMultipartRequest.yaml b/openapi/v1/schemas/job/ImportSignalRunFomMultipartRequest.yaml new file mode 100644 index 00000000..ca06cd25 --- /dev/null +++ b/openapi/v1/schemas/job/ImportSignalRunFomMultipartRequest.yaml @@ -0,0 +1,21 @@ +properties: + studyId: + type: string + example: GSF1234567 + description: The ID (accession) of the study for organising files in the internal storage; linking to the target entity must be done through a separate endpoint. + metadata: + type: string + format: binary + data: + type: string + format: binary + templateId: + example: GSF334953 + type: string + previousVersion: + example: GSF334953 + type: string +required: + - studyId + - data +type: object From 22fe2275173e2a3541022832a76c5aa6b1ab03a0 Mon Sep 17 00:00:00 2001 From: Mikhail Smazhevsky Date: Wed, 9 Jul 2025 10:37:50 +0200 Subject: [PATCH 2/5] ODM-11748: import cells (#408) * !!ODM-11748: import cells * !!ODM-11748: changing endpoints order: Study, Sample, Library, Preparation, Cells, Expression, Variants, Flow Cytometry, Files --- openapi/v1/job.yaml | 327 +++++++++++------- openapi/v1/odmApi.yaml | 312 ++++++++++------- .../job/ImportCellsFromMultipartRequest.yaml | 12 + .../v1/schemas/job/ImportCellsRequest.yaml | 9 + 4 files changed, 415 insertions(+), 245 deletions(-) create mode 100644 openapi/v1/schemas/job/ImportCellsFromMultipartRequest.yaml create mode 100644 openapi/v1/schemas/job/ImportCellsRequest.yaml diff --git a/openapi/v1/job.yaml b/openapi/v1/job.yaml index cf09d79f..f92f5296 100644 --- a/openapi/v1/job.yaml +++ b/openapi/v1/job.yaml @@ -32,70 +32,32 @@ tags: - name: Data import via direct file upload - name: Job operations paths: - /api/v1/jobs/import/expression: + /api/v1/jobs/import/study: post: - description: |- - ## Data - This operation necessitates the URL of a tabular data file, which must be either in TSV or GCT 1.2 format. Consult the user guide for a comprehensive understanding of the file content requirements. The endpoint is capable of handling uploads of any data type, which can be detailed in the parameters section, not only Gene Expression data. + description: |2 - ## Metadata - It is also possible to optionally supply the URL of a metadata file. This metadata will be used as the original metadata for the created objects. The file is expected to contain single record with metadata describing the uploaded signal file. - ## Metadata file format - * Extension: any, `.zip` and `.gz` extensions are treated as archives and get decompressed - * Format: plain text, Tab-separated format (TSV), attribute names and record values are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence (`U+000D U+000A`) - * Header: the first line is treated as table header that contains attribute names - * Records: the second line contains the values for each of the attributes described in the header line. Values represent single string or list of strings. List values are separated using the "pipe" `|` (`U+007C`) separator. Values are trimmed of whitespace before parsing, and a literal `|` (`U+007C`) character may be escaped by repeating it twice. - * Skip zeros in original data file: If this option is selected, zeros in the file will be ignored, thus conserving time and space. This option is particularly useful for handling very sparse data such as Single Cell data. - operationId: startImportExpression + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportStudy parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." - in: query - name: allow_dups - schema: - default: false - type: boolean + in: query + name: allow_dups + schema: + default: false + type: boolean requestBody: content: application/json: schema: - $ref: "#/components/schemas/ImportExpressionSignalRunRequest" - required: false - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/Info" - description: successful operation - security: - - Access-token: [] - - Genestack-API-Token: [] - summary: Import any tabular data from TSV or GCT files - tags: - - Data import jobs - x-codegen-request-body-name: body - /api/v1/jobs/import/expression/multipart: - post: - description: |- - ## Data - This operation necessitates the URL of a tabular data file, which must be either in TSV or GCT 1.2 format. Consult the user guide for a comprehensive understanding of the file content requirements. The endpoint is capable of handling uploads of any data type, which can be detailed in the parameters section, not only Gene Expression data. - - ## Metadata - It is also possible to optionally supply the URL of a metadata file. This metadata will be used as the original metadata for the created objects. The file is expected to contain single record with metadata describing the uploaded signal file. - ## Metadata file format - * Extension: any, `.zip` and `.gz` extensions are treated as archives and get decompressed - * Format: plain text, Tab-separated format (TSV), attribute names and record values are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence (`U+000D U+000A`) - * Header: the first line is treated as table header that contains attribute names - * Records: the second line contains the values for each of the attributes described in the header line. Values represent single string or list of strings. List values are separated using the "pipe" `|` (`U+007C`) separator. Values are trimmed of whitespace before parsing, and a literal `|` (`U+007C`) character may be escaped by repeating it twice. - * Skip zeros in original data file: If this option is selected, zeros in the file will be ignored, thus conserving time and space. This option is particularly useful for handling very sparse data such as Single Cell data. - operationId: startImportExpressionMultipart - requestBody: - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" + $ref: "#/components/schemas/ImportMetadataRequest" required: false responses: "200": @@ -105,42 +67,38 @@ paths: $ref: "#/components/schemas/Info" description: successful operation security: - - Access-token: [] - - Genestack-API-Token: [] - summary: Import any tabular data from TSV or GCT files via multipart/form-data upload + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import study metadata from a TSV file tags: - - Data import via direct file upload + - Data import jobs x-codegen-request-body-name: body - /api/v1/jobs/import/flow-cytometry: + /api/v1/jobs/import/samples: post: - description: |- - This operation necessitates the URL of a gated flow cytometry data file, which must be in FACS format. - Consult the user guide for a comprehensive understanding of the file content requirements. - For flow cytometry data in FCS format use expression endpoint. + description: |2 - When job finishes successfully the following **result** object - can be obtained using `GET /job/{id}/output` request: + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: ``` { "groupAccession": "GSF1234567" } ``` - operationId: startImportFlowCytometry + operationId: startImportSamples parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." - in: query - name: allow_dups - schema: - default: false - type: boolean + in: query + name: allow_dups + schema: + default: false + type: boolean requestBody: content: application/json: schema: - $ref: "#/components/schemas/ImportSignalRunRequest" + $ref: "#/components/schemas/ImportMetadataRequest" required: false responses: "200": @@ -150,33 +108,29 @@ paths: $ref: "#/components/schemas/Info" description: successful operation security: - - Access-token: [] - - Genestack-API-Token: [] - summary: Import flow-cytometry data and metadata from FACS and TSV files + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import a group of sample metadata objects from a TSV file tags: - - Data import jobs + - Data import jobs x-codegen-request-body-name: body - /api/v1/jobs/import/flow-cytometry/multipart: + /api/v1/jobs/import/samples/multipart: post: - description: |- - This operation necessitates the URL of a gated flow cytometry data file, which must be in FACS format. - Consult the user guide for a comprehensive understanding of the file content requirements. - For flow cytometry data in FCS format use expression endpoint. + description: |2 - When job finishes successfully the following **result** object - can be obtained using `GET /job/{id}/output` request: + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: ``` { "groupAccession": "GSF1234567" } ``` - operationId: startImportFlowCytometryMultipart + operationId: startImportSamplesMultipart requestBody: content: multipart/form-data: schema: - $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" required: false responses: "200": @@ -188,7 +142,7 @@ paths: security: - Access-token: [ ] - Genestack-API-Token: [ ] - summary: Import flow-cytometry data and metadata from FACS and TSV files via multipart/form-data upload + summary: Import a group of sample metadata objects from a TSV file via multipart/form-data upload tags: - Data import via direct file upload x-codegen-request-body-name: body @@ -205,7 +159,7 @@ paths: ``` operationId: startImportLibraries parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: query @@ -281,7 +235,7 @@ paths: ``` operationId: startImportPreparations parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: query @@ -341,10 +295,12 @@ paths: tags: - Data import via direct file upload x-codegen-request-body-name: body - /api/v1/jobs/import/samples: + /api/v1/jobs/import/cells: post: description: |2 + The endpoint initiates a job to import cell data and creates a Cell Group to manage it. + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: ``` @@ -352,9 +308,9 @@ paths: "groupAccession": "GSF1234567" } ``` - operationId: startImportSamples + operationId: startImportCells parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: query @@ -366,7 +322,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ImportMetadataRequest" + $ref: "#/components/schemas/ImportCellsRequest" required: false responses: "200": @@ -378,14 +334,16 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: Import a group of sample metadata objects from a TSV file + summary: Import a group of cell data objects from a TSV file tags: - Data import jobs x-codegen-request-body-name: body - /api/v1/jobs/import/samples/multipart: + /api/v1/jobs/import/cells/multipart: post: description: |2 + The endpoint initiates a job to import cell data and creates a Cell Group to manage it. + When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: ``` @@ -393,12 +351,12 @@ paths: "groupAccession": "GSF1234567" } ``` - operationId: startImportSamplesMultipart + operationId: startImportCellsMultipart requestBody: content: - multipart/form-data: + multipart/form-data: schema: - $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + $ref: "#/components/schemas/ImportCellsFromMultipartRequest" required: false responses: "200": @@ -410,36 +368,39 @@ paths: security: - Access-token: [ ] - Genestack-API-Token: [ ] - summary: Import a group of sample metadata objects from a TSV file via multipart/form-data upload + summary: Import a group of cell data objects from a TSV file via multipart/form-data upload tags: - Data import via direct file upload x-codegen-request-body-name: body - /api/v1/jobs/import/study: + /api/v1/jobs/import/expression: post: - description: |2 - - When job finishes successfully the following **result** object can be obtained using `GET /job/{id}/output` request: + description: |- + ## Data + This operation necessitates the URL of a tabular data file, which must be either in TSV or GCT 1.2 format. Consult the user guide for a comprehensive understanding of the file content requirements. The endpoint is capable of handling uploads of any data type, which can be detailed in the parameters section, not only Gene Expression data. - ``` - { - "groupAccession": "GSF1234567" - } - ``` - operationId: startImportStudy + ## Metadata + It is also possible to optionally supply the URL of a metadata file. This metadata will be used as the original metadata for the created objects. The file is expected to contain single record with metadata describing the uploaded signal file. + ## Metadata file format + * Extension: any, `.zip` and `.gz` extensions are treated as archives and get decompressed + * Format: plain text, Tab-separated format (TSV), attribute names and record values are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence (`U+000D U+000A`) + * Header: the first line is treated as table header that contains attribute names + * Records: the second line contains the values for each of the attributes described in the header line. Values represent single string or list of strings. List values are separated using the "pipe" `|` (`U+007C`) separator. Values are trimmed of whitespace before parsing, and a literal `|` (`U+007C`) character may be escaped by repeating it twice. + * Skip zeros in original data file: If this option is selected, zeros in the file will be ignored, thus conserving time and space. This option is particularly useful for handling very sparse data such as Single Cell data. + operationId: startImportExpression parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." - in: query - name: allow_dups - schema: - default: false - type: boolean + in: query + name: allow_dups + schema: + default: false + type: boolean requestBody: content: application/json: schema: - $ref: "#/components/schemas/ImportMetadataRequest" + $ref: "#/components/schemas/ImportExpressionSignalRunRequest" required: false responses: "200": @@ -449,11 +410,46 @@ paths: $ref: "#/components/schemas/Info" description: successful operation security: - - Access-token: [] - - Genestack-API-Token: [] - summary: Import study metadata from a TSV file + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import any tabular data from TSV or GCT files tags: - - Data import jobs + - Data import jobs + x-codegen-request-body-name: body + /api/v1/jobs/import/expression/multipart: + post: + description: |- + ## Data + This operation necessitates the URL of a tabular data file, which must be either in TSV or GCT 1.2 format. Consult the user guide for a comprehensive understanding of the file content requirements. The endpoint is capable of handling uploads of any data type, which can be detailed in the parameters section, not only Gene Expression data. + + ## Metadata + It is also possible to optionally supply the URL of a metadata file. This metadata will be used as the original metadata for the created objects. The file is expected to contain single record with metadata describing the uploaded signal file. + ## Metadata file format + * Extension: any, `.zip` and `.gz` extensions are treated as archives and get decompressed + * Format: plain text, Tab-separated format (TSV), attribute names and record values are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence (`U+000D U+000A`) + * Header: the first line is treated as table header that contains attribute names + * Records: the second line contains the values for each of the attributes described in the header line. Values represent single string or list of strings. List values are separated using the "pipe" `|` (`U+007C`) separator. Values are trimmed of whitespace before parsing, and a literal `|` (`U+007C`) character may be escaped by repeating it twice. + * Skip zeros in original data file: If this option is selected, zeros in the file will be ignored, thus conserving time and space. This option is particularly useful for handling very sparse data such as Single Cell data. + operationId: startImportExpressionMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import any tabular data from TSV or GCT files via multipart/form-data upload + tags: + - Data import via direct file upload x-codegen-request-body-name: body /api/v1/jobs/import/variant: post: @@ -462,7 +458,7 @@ paths: groupAccession\": \"GSF1234567\"\n}\n```\n " operationId: startImportVariant parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: query @@ -516,6 +512,87 @@ paths: tags: - Data import via direct file upload x-codegen-request-body-name: body + /api/v1/jobs/import/flow-cytometry: + post: + description: |- + This operation necessitates the URL of a gated flow cytometry data file, which must be in FACS format. + Consult the user guide for a comprehensive understanding of the file content requirements. + For flow cytometry data in FCS format use expression endpoint. + + When job finishes successfully the following **result** object + can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportFlowCytometry + parameters: + - description: "Load duplicate data: the data from the link(s) has already been\ + \ previously loaded into ODM, and for **testing purposes**, you need to\ + \ load this data again." + in: query + name: allow_dups + schema: + default: false + type: boolean + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/ImportSignalRunRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import flow-cytometry data and metadata from FACS and TSV files + tags: + - Data import jobs + x-codegen-request-body-name: body + /api/v1/jobs/import/flow-cytometry/multipart: + post: + description: |- + This operation necessitates the URL of a gated flow cytometry data file, which must be in FACS format. + Consult the user guide for a comprehensive understanding of the file content requirements. + For flow cytometry data in FCS format use expression endpoint. + + When job finishes successfully the following **result** object + can be obtained using `GET /job/{id}/output` request: + + ``` + { + "groupAccession": "GSF1234567" + } + ``` + operationId: startImportFlowCytometryMultipart + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: successful operation + security: + - Access-token: [ ] + - Genestack-API-Token: [ ] + summary: Import flow-cytometry data and metadata from FACS and TSV files via multipart/form-data upload + tags: + - Data import via direct file upload + x-codegen-request-body-name: body /api/v1/jobs/import/file: post: description: |- @@ -702,7 +779,7 @@ paths: components: parameters: AllowDuplicates: - description: "Load duplicate data: the data from the links has already been\ + description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to load\ \ this data again." in: query @@ -713,6 +790,10 @@ components: schemas: ExceptionTypeAndMessage: $ref: "./schemas/job/ExceptionTypeAndMessage.yaml" + ImportCellsRequest: + $ref: "./schemas/job/ImportCellsRequest.yaml" + ImportCellsFromMultipartRequest: + $ref: "./schemas/job/ImportCellsFromMultipartRequest.yaml" ImportMetadataRequest: $ref: "./schemas/job/ImportMetadataRequest.yaml" ImportMetadataFromMultipartRequest: diff --git a/openapi/v1/odmApi.yaml b/openapi/v1/odmApi.yaml index 68fe26bd..0533b938 100644 --- a/openapi/v1/odmApi.yaml +++ b/openapi/v1/odmApi.yaml @@ -15896,33 +15896,14 @@ paths: summary: "Retrieve file's metadata by study ID" tags: - "Files integration as User" - /api/v1/jobs/import/expression: + /api/v1/jobs/import/study: post: - description: "## Data\n This operation necessitates the URL of a tabular data\ - \ file, which must be either in TSV or GCT 1.2 format. Consult the user guide\ - \ for a comprehensive understanding of the file content requirements. The\ - \ endpoint is capable of handling uploads of any data type, which can be detailed\ - \ in the parameters section, not only Gene Expression data.\n\n## Metadata\n\ - \ It is also possible to optionally supply the URL of a metadata file. This\ - \ metadata will be used as the original metadata for the created objects.\ - \ The file is expected to contain single record with metadata describing the\ - \ uploaded signal file.\n## Metadata file format\n * Extension: any, `.zip`\ - \ and `.gz` extensions are treated as archives and get decompressed\n * Format:\ - \ plain text, Tab-separated format (TSV), attribute names and record values\ - \ are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence\ - \ (`U+000D U+000A`)\n * Header: the first line is treated as table header\ - \ that contains attribute names\n * Records: the second line contains the\ - \ values for each of the attributes described in the header line. Values\ - \ represent single string or list of strings. List values are separated using\ - \ the \"pipe\" `|` (`U+007C`) separator. Values are trimmed of whitespace\ - \ before parsing, and a literal `|` (`U+007C`) character may be escaped by\ - \ repeating it twice.\n * Skip zeros in original data file: If this option\ - \ is selected, zeros in the file will be ignored, thus conserving time and\ - \ space. This option is particularly useful for handling very sparse data\ - \ such as Single Cell data." - operationId: "startImportExpression" + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportStudy" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -15934,7 +15915,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ImportExpressionSignalRunRequest" + $ref: "#/components/schemas/ImportMetadataRequest" required: false responses: "200": @@ -15946,67 +15927,18 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import any tabular data from TSV or GCT files" + summary: "Import study metadata from a TSV file" tags: - "Data import jobs" x-codegen-request-body-name: "body" - /api/v1/jobs/import/expression/multipart: - post: - description: "## Data\n This operation necessitates the URL of a tabular data\ - \ file, which must be either in TSV or GCT 1.2 format. Consult the user guide\ - \ for a comprehensive understanding of the file content requirements. The\ - \ endpoint is capable of handling uploads of any data type, which can be detailed\ - \ in the parameters section, not only Gene Expression data.\n\n## Metadata\n\ - \ It is also possible to optionally supply the URL of a metadata file. This\ - \ metadata will be used as the original metadata for the created objects.\ - \ The file is expected to contain single record with metadata describing the\ - \ uploaded signal file.\n## Metadata file format\n * Extension: any, `.zip`\ - \ and `.gz` extensions are treated as archives and get decompressed\n * Format:\ - \ plain text, Tab-separated format (TSV), attribute names and record values\ - \ are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence\ - \ (`U+000D U+000A`)\n * Header: the first line is treated as table header\ - \ that contains attribute names\n * Records: the second line contains the\ - \ values for each of the attributes described in the header line. Values\ - \ represent single string or list of strings. List values are separated using\ - \ the \"pipe\" `|` (`U+007C`) separator. Values are trimmed of whitespace\ - \ before parsing, and a literal `|` (`U+007C`) character may be escaped by\ - \ repeating it twice.\n * Skip zeros in original data file: If this option\ - \ is selected, zeros in the file will be ignored, thus conserving time and\ - \ space. This option is particularly useful for handling very sparse data\ - \ such as Single Cell data." - operationId: "startImportExpressionMultipart" - requestBody: - content: - multipart/form-data: - schema: - $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" - required: false - responses: - "200": - content: - application/json: - schema: - $ref: "#/components/schemas/Info" - description: "successful operation" - security: - - Access-token: [] - - Genestack-API-Token: [] - summary: "Import any tabular data from TSV or GCT files via multipart/form-data\ - \ upload" - tags: - - "Data import via direct file upload" - x-codegen-request-body-name: "body" - /api/v1/jobs/import/flow-cytometry: + /api/v1/jobs/import/samples: post: - description: "This operation necessitates the URL of a gated flow cytometry\ - \ data file, which must be in FACS format.\nConsult the user guide for a comprehensive\ - \ understanding of the file content requirements.\nFor flow cytometry data\ - \ in FCS format use expression endpoint.\n\nWhen job finishes successfully\ - \ the following **result** object\ncan be obtained using `GET /job/{id}/output`\ - \ request:\n\n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```" - operationId: "startImportFlowCytometry" + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportSamples" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16018,7 +15950,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ImportSignalRunRequest" + $ref: "#/components/schemas/ImportMetadataRequest" required: false responses: "200": @@ -16030,24 +15962,21 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import flow-cytometry data and metadata from FACS and TSV files" + summary: "Import a group of sample metadata objects from a TSV file" tags: - "Data import jobs" x-codegen-request-body-name: "body" - /api/v1/jobs/import/flow-cytometry/multipart: + /api/v1/jobs/import/samples/multipart: post: - description: "This operation necessitates the URL of a gated flow cytometry\ - \ data file, which must be in FACS format.\nConsult the user guide for a comprehensive\ - \ understanding of the file content requirements.\nFor flow cytometry data\ - \ in FCS format use expression endpoint.\n\nWhen job finishes successfully\ - \ the following **result** object\ncan be obtained using `GET /job/{id}/output`\ - \ request:\n\n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```" - operationId: "startImportFlowCytometryMultipart" + description: "\nWhen job finishes successfully the following **result** object\ + \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ + groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportSamplesMultipart" requestBody: content: multipart/form-data: schema: - $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" required: false responses: "200": @@ -16059,8 +15988,8 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import flow-cytometry data and metadata from FACS and TSV files via\ - \ multipart/form-data upload" + summary: "Import a group of sample metadata objects from a TSV file via multipart/form-data\ + \ upload" tags: - "Data import via direct file upload" x-codegen-request-body-name: "body" @@ -16071,7 +16000,7 @@ paths: groupAccession\": \"GSF1234567\"\n}\n```\n" operationId: "startImportLibraries" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16136,7 +16065,7 @@ paths: groupAccession\": \"GSF1234567\"\n}\n```\n" operationId: "startImportPreparations" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16191,14 +16120,15 @@ paths: tags: - "Data import via direct file upload" x-codegen-request-body-name: "body" - /api/v1/jobs/import/samples: + /api/v1/jobs/import/cells: post: - description: "\nWhen job finishes successfully the following **result** object\ - \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ - groupAccession\": \"GSF1234567\"\n}\n```\n" - operationId: "startImportSamples" + description: "\nThe endpoint initiates a job to import cell data and creates\ + \ a Cell Group to manage it.\n\nWhen job finishes successfully the following\ + \ **result** object can be obtained using `GET /job/{id}/output` request:\n\ + \n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportCells" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16210,7 +16140,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ImportMetadataRequest" + $ref: "#/components/schemas/ImportCellsRequest" required: false responses: "200": @@ -16222,21 +16152,22 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import a group of sample metadata objects from a TSV file" + summary: "Import a group of cell data objects from a TSV file" tags: - "Data import jobs" x-codegen-request-body-name: "body" - /api/v1/jobs/import/samples/multipart: + /api/v1/jobs/import/cells/multipart: post: - description: "\nWhen job finishes successfully the following **result** object\ - \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ - groupAccession\": \"GSF1234567\"\n}\n```\n" - operationId: "startImportSamplesMultipart" + description: "\nThe endpoint initiates a job to import cell data and creates\ + \ a Cell Group to manage it.\n\nWhen job finishes successfully the following\ + \ **result** object can be obtained using `GET /job/{id}/output` request:\n\ + \n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```\n" + operationId: "startImportCellsMultipart" requestBody: content: multipart/form-data: schema: - $ref: "#/components/schemas/ImportMetadataFromMultipartRequest" + $ref: "#/components/schemas/ImportCellsFromMultipartRequest" required: false responses: "200": @@ -16248,19 +16179,38 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import a group of sample metadata objects from a TSV file via multipart/form-data\ + summary: "Import a group of cell data objects from a TSV file via multipart/form-data\ \ upload" tags: - "Data import via direct file upload" x-codegen-request-body-name: "body" - /api/v1/jobs/import/study: + /api/v1/jobs/import/expression: post: - description: "\nWhen job finishes successfully the following **result** object\ - \ can be obtained using `GET /job/{id}/output` request:\n\n```\n{\n \"\ - groupAccession\": \"GSF1234567\"\n}\n```\n" - operationId: "startImportStudy" + description: "## Data\n This operation necessitates the URL of a tabular data\ + \ file, which must be either in TSV or GCT 1.2 format. Consult the user guide\ + \ for a comprehensive understanding of the file content requirements. The\ + \ endpoint is capable of handling uploads of any data type, which can be detailed\ + \ in the parameters section, not only Gene Expression data.\n\n## Metadata\n\ + \ It is also possible to optionally supply the URL of a metadata file. This\ + \ metadata will be used as the original metadata for the created objects.\ + \ The file is expected to contain single record with metadata describing the\ + \ uploaded signal file.\n## Metadata file format\n * Extension: any, `.zip`\ + \ and `.gz` extensions are treated as archives and get decompressed\n * Format:\ + \ plain text, Tab-separated format (TSV), attribute names and record values\ + \ are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence\ + \ (`U+000D U+000A`)\n * Header: the first line is treated as table header\ + \ that contains attribute names\n * Records: the second line contains the\ + \ values for each of the attributes described in the header line. Values\ + \ represent single string or list of strings. List values are separated using\ + \ the \"pipe\" `|` (`U+007C`) separator. Values are trimmed of whitespace\ + \ before parsing, and a literal `|` (`U+007C`) character may be escaped by\ + \ repeating it twice.\n * Skip zeros in original data file: If this option\ + \ is selected, zeros in the file will be ignored, thus conserving time and\ + \ space. This option is particularly useful for handling very sparse data\ + \ such as Single Cell data." + operationId: "startImportExpression" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16272,7 +16222,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ImportMetadataRequest" + $ref: "#/components/schemas/ImportExpressionSignalRunRequest" required: false responses: "200": @@ -16284,10 +16234,56 @@ paths: security: - Access-token: [] - Genestack-API-Token: [] - summary: "Import study metadata from a TSV file" + summary: "Import any tabular data from TSV or GCT files" tags: - "Data import jobs" x-codegen-request-body-name: "body" + /api/v1/jobs/import/expression/multipart: + post: + description: "## Data\n This operation necessitates the URL of a tabular data\ + \ file, which must be either in TSV or GCT 1.2 format. Consult the user guide\ + \ for a comprehensive understanding of the file content requirements. The\ + \ endpoint is capable of handling uploads of any data type, which can be detailed\ + \ in the parameters section, not only Gene Expression data.\n\n## Metadata\n\ + \ It is also possible to optionally supply the URL of a metadata file. This\ + \ metadata will be used as the original metadata for the created objects.\ + \ The file is expected to contain single record with metadata describing the\ + \ uploaded signal file.\n## Metadata file format\n * Extension: any, `.zip`\ + \ and `.gz` extensions are treated as archives and get decompressed\n * Format:\ + \ plain text, Tab-separated format (TSV), attribute names and record values\ + \ are separated with tabs (`U+0009`), lines are separated with `CRLF` sequence\ + \ (`U+000D U+000A`)\n * Header: the first line is treated as table header\ + \ that contains attribute names\n * Records: the second line contains the\ + \ values for each of the attributes described in the header line. Values\ + \ represent single string or list of strings. List values are separated using\ + \ the \"pipe\" `|` (`U+007C`) separator. Values are trimmed of whitespace\ + \ before parsing, and a literal `|` (`U+007C`) character may be escaped by\ + \ repeating it twice.\n * Skip zeros in original data file: If this option\ + \ is selected, zeros in the file will be ignored, thus conserving time and\ + \ space. This option is particularly useful for handling very sparse data\ + \ such as Single Cell data." + operationId: "startImportExpressionMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportExpressionSignalRunFromMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import any tabular data from TSV or GCT files via multipart/form-data\ + \ upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/variant: post: description: "\nWhen job finishes successfully the following **result** object\ @@ -16295,7 +16291,7 @@ paths: groupAccession\": \"GSF1234567\"\n}\n```\n " operationId: "startImportVariant" parameters: - - description: "Load duplicate data: the data from the links has already been\ + - description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to\ \ load this data again." in: "query" @@ -16350,6 +16346,74 @@ paths: tags: - "Data import via direct file upload" x-codegen-request-body-name: "body" + /api/v1/jobs/import/flow-cytometry: + post: + description: "This operation necessitates the URL of a gated flow cytometry\ + \ data file, which must be in FACS format.\nConsult the user guide for a comprehensive\ + \ understanding of the file content requirements.\nFor flow cytometry data\ + \ in FCS format use expression endpoint.\n\nWhen job finishes successfully\ + \ the following **result** object\ncan be obtained using `GET /job/{id}/output`\ + \ request:\n\n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```" + operationId: "startImportFlowCytometry" + parameters: + - description: "Load duplicate data: the data from the link(s) has already been\ + \ previously loaded into ODM, and for **testing purposes**, you need to\ + \ load this data again." + in: "query" + name: "allow_dups" + schema: + default: false + type: "boolean" + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/ImportSignalRunRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import flow-cytometry data and metadata from FACS and TSV files" + tags: + - "Data import jobs" + x-codegen-request-body-name: "body" + /api/v1/jobs/import/flow-cytometry/multipart: + post: + description: "This operation necessitates the URL of a gated flow cytometry\ + \ data file, which must be in FACS format.\nConsult the user guide for a comprehensive\ + \ understanding of the file content requirements.\nFor flow cytometry data\ + \ in FCS format use expression endpoint.\n\nWhen job finishes successfully\ + \ the following **result** object\ncan be obtained using `GET /job/{id}/output`\ + \ request:\n\n```\n{\n \"groupAccession\": \"GSF1234567\"\n}\n```" + operationId: "startImportFlowCytometryMultipart" + requestBody: + content: + multipart/form-data: + schema: + $ref: "#/components/schemas/ImportSignalRunFomMultipartRequest" + required: false + responses: + "200": + content: + application/json: + schema: + $ref: "#/components/schemas/Info" + description: "successful operation" + security: + - Access-token: [] + - Genestack-API-Token: [] + summary: "Import flow-cytometry data and metadata from FACS and TSV files via\ + \ multipart/form-data upload" + tags: + - "Data import via direct file upload" + x-codegen-request-body-name: "body" /api/v1/jobs/import/file: post: description: "In order to import a file as an attachment to a study, please\ @@ -22369,6 +22433,10 @@ components: $ref: "./schemas/integration/FilterOption.yaml" ExceptionTypeAndMessage: $ref: "./schemas/job/ExceptionTypeAndMessage.yaml" + ImportCellsRequest: + $ref: "./schemas/job/ImportCellsRequest.yaml" + ImportCellsFromMultipartRequest: + $ref: "./schemas/job/ImportCellsFromMultipartRequest.yaml" ImportMetadataRequest: $ref: "./schemas/job/ImportMetadataRequest.yaml" ImportMetadataFromMultipartRequest: @@ -22462,7 +22530,7 @@ components: type: "apiKey" parameters: AllowDuplicates: - description: "Load duplicate data: the data from the links has already been\ + description: "Load duplicate data: the data from the link(s) has already been\ \ previously loaded into ODM, and for **testing purposes**, you need to load\ \ this data again." in: "query" diff --git a/openapi/v1/schemas/job/ImportCellsFromMultipartRequest.yaml b/openapi/v1/schemas/job/ImportCellsFromMultipartRequest.yaml new file mode 100644 index 00000000..4f109f4e --- /dev/null +++ b/openapi/v1/schemas/job/ImportCellsFromMultipartRequest.yaml @@ -0,0 +1,12 @@ +properties: + studyId: + type: string + example: GSF1234567 + description: The ID (accession) of the study for organising files in the internal storage; linking to the target entity must be done through a separate endpoint. + data: + type: string + format: binary +required: + - studyId + - data +type: object diff --git a/openapi/v1/schemas/job/ImportCellsRequest.yaml b/openapi/v1/schemas/job/ImportCellsRequest.yaml new file mode 100644 index 00000000..c854b4a6 --- /dev/null +++ b/openapi/v1/schemas/job/ImportCellsRequest.yaml @@ -0,0 +1,9 @@ +example: + dataLink: https://mybucket.s3.amazonaws.com/my-experiment/cells.tsv +properties: + dataLink: + example: https://mybucket.s3.amazonaws.com/my-experiment/cells.tsv + type: string +required: +- dataLink +type: object From c6a26ee31dc143cf3da23ec523d73cb86784c347 Mon Sep 17 00:00:00 2001 From: Mikhail Smazhevsky Date: Tue, 22 Jul 2025 10:51:23 +0200 Subject: [PATCH 3/5] ODM-11756: get cells endpoints (#412) * !!ODM-11756: get cells endpoints * !!ODM-11756: no additionalProperties --- .../fs/usr/share/nginx/html/helper/index.html | 8 +- openapi/v1/cellCurator.yaml | 120 ++++++++++ openapi/v1/cellUser.yaml | 120 ++++++++++ openapi/v1/odmApi.yaml | 206 ++++++++++++++++++ openapi/v1/schemas/cell/Cell.yaml | 31 +++ openapi/v1/schemas/cell/CellListResponse.yaml | 33 +++ 6 files changed, 517 insertions(+), 1 deletion(-) create mode 100644 openapi/v1/cellCurator.yaml create mode 100644 openapi/v1/cellUser.yaml create mode 100644 openapi/v1/schemas/cell/Cell.yaml create mode 100644 openapi/v1/schemas/cell/CellListResponse.yaml diff --git a/openapi/swagger/fs/usr/share/nginx/html/helper/index.html b/openapi/swagger/fs/usr/share/nginx/html/helper/index.html index f1c18e07..11f7017d 100644 --- a/openapi/swagger/fs/usr/share/nginx/html/helper/index.html +++ b/openapi/swagger/fs/usr/share/nginx/html/helper/index.html @@ -1,5 +1,5 @@