From 4e807a4caee00eaaf186760524b055f2a1c0af36 Mon Sep 17 00:00:00 2001 From: Jesse Bickel Date: Tue, 27 Jun 2023 15:05:47 -0500 Subject: [PATCH] Use `csv-parse`'s sync API throughout the scripts Without this change there would be two CSV reading libraries in use and two scripts would continue to have race conditions. We do not need the sophistication of a stream API because we have yet to see any CSVs larger than a few MiB. This change uses the technique already present in the `postProposalVersions.ts` script in the older two scripts. Because there would be repeated boilerplate otherwise, a new `csv.ts` has the commonly used type and type assertion guard. Issue #25 Race between row and EOF events for forms and proposals --- package-lock.json | 14 ------- package.json | 1 - src/csv.ts | 13 ++++++ src/generateApplicationFormJson.ts | 66 +++++++++++++++++------------- src/generateBaseFieldsInserts.ts | 49 ++++++++++++---------- src/postProposalVersions.ts | 15 +------ 6 files changed, 79 insertions(+), 79 deletions(-) create mode 100644 src/csv.ts diff --git a/package-lock.json b/package-lock.json index a6c7929..1c7e489 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,6 @@ "dependencies": { "axios": "^1.13.1", "csv-parse": "^5.5.6", - "csv-reader": "^1.0.12", "dotenv": "^16.3.1", "jwt-decode": "^3.1.2", "openid-client": "^5.6.5", @@ -1024,14 +1023,6 @@ "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.6.tgz", "integrity": "sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==" }, - "node_modules/csv-reader": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/csv-reader/-/csv-reader-1.0.12.tgz", - "integrity": "sha512-0AAgazKJUywtjvZbclNuovIiQY/WyvojWw15Y2k3kPixE+pDiOFnfg5FcH3CfDqqnrB2f3p5oPAc446EXD01Tw==", - "engines": { - "node": ">=8.0.0" - } - }, "node_modules/dateformat": { "version": "4.6.3", "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz", @@ -4630,11 +4621,6 @@ "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.5.6.tgz", "integrity": "sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==" }, - "csv-reader": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/csv-reader/-/csv-reader-1.0.12.tgz", - "integrity": "sha512-0AAgazKJUywtjvZbclNuovIiQY/WyvojWw15Y2k3kPixE+pDiOFnfg5FcH3CfDqqnrB2f3p5oPAc446EXD01Tw==" - }, "dateformat": { "version": "4.6.3", "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz", diff --git a/package.json b/package.json index a5d3f0d..7023ccc 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,6 @@ }, "dependencies": { "axios": "^1.13.1", - "csv-reader": "^1.0.12", "csv-parse": "^5.5.6", "dotenv": "^16.3.1", "jwt-decode": "^3.1.2", diff --git a/src/csv.ts b/src/csv.ts new file mode 100644 index 0000000..f345221 --- /dev/null +++ b/src/csv.ts @@ -0,0 +1,13 @@ +import { AssertionError } from 'assert'; + +export type CsvRow = Record; + +export function assertIsCsvRow(row: unknown): asserts row is CsvRow { + if (row === null + || typeof row !== 'object' + || Object.keys(row).length === 0 + || Object.keys(row).some((key) => typeof key !== 'string') + || Object.values(row).some((value) => typeof value !== 'string')) { + throw new AssertionError({ message: 'Given row is not a CsvRow!' }); + } +} diff --git a/src/generateApplicationFormJson.ts b/src/generateApplicationFormJson.ts index bb79db0..a4a3945 100644 --- a/src/generateApplicationFormJson.ts +++ b/src/generateApplicationFormJson.ts @@ -2,12 +2,13 @@ // The CSV is usually derived from the following URL using xlsx export and `xslx2csv`: // https://docs.google.com/spreadsheets/d/1Ep3_MEIyIbhxJ5TpH5x4Q1fRZqr1CFHXZ_uv3fEOSEk import { - createReadStream, createWriteStream, + readFileSync, } from 'fs'; -import CsvReadableStream from 'csv-reader'; +import { parse as csvParse } from 'csv-parse/sync'; import { parse } from 'ts-command-line-args'; import axios, { AxiosError } from 'axios'; +import { assertIsCsvRow } from './csv'; import { logger } from './logger'; interface Args { @@ -19,10 +20,9 @@ interface Args { apiUrl: string; } -type CsvRow = Record; interface ApplicationFormField { baseFieldId: number; - position: number | string | undefined; + position: number; label: string | undefined; } interface ApplicationForm { @@ -35,7 +35,7 @@ interface BaseField { label: string; shortCode: string; dataType: string; - createdAt: string; + createdAt: Date; } const args = parse({ @@ -47,7 +47,10 @@ const args = parse({ apiUrl: String, }); -const csvInput = createReadStream(args.inputFile, 'utf8'); +const csvParser = csvParse(readFileSync(args.inputFile, 'utf8'), { + columns: true, +}) as unknown[]; + const jsonOutput = createWriteStream(args.outputFile, 'utf8'); const { opportunityId, funder, bearerToken, apiUrl, @@ -67,34 +70,39 @@ axios(`${apiUrl}/baseFields`, { }, }).then((response) => { const fields: BaseField[] = response.data as BaseField[]; - csvInput.pipe( - new CsvReadableStream({ - parseNumbers: true, - parseBooleans: true, - trim: true, - allowQuotes: true, - asObject: true, - }), - ).on('data', (row: CsvRow) => { - const label = `${funder}: field label`; - const id = `${funder}: external ID`; - const pos = `${funder}: form position`; - let field: BaseField | undefined; - if (row[id] !== '') { + const label = `${funder}: field label`; + const pos = `${funder}: form position`; + Promise.all(csvParser.map((row) => { + assertIsCsvRow(row); + let field: BaseField; + if (row[label] !== '') { const shortCode = row['Internal field name']; - field = fields.find((e) => e.shortCode === shortCode); - if (field) { - const applicationFormField: ApplicationFormField = { - baseFieldId: field.id, - position: row[pos] ? '' : (counter += 1), - label: row[label], - }; - applicationForm.fields.push(applicationFormField); + const fieldsFiltered = fields.filter((e) => e.shortCode === shortCode); + if (fieldsFiltered.length === 1 && fieldsFiltered[0] !== undefined) { + [field] = fieldsFiltered; + } else { + const code = shortCode !== undefined ? shortCode : 'undefined'; + throw new Error(`Found ${fieldsFiltered.length} base fields (expected 1): shortCode=${code}`); } + const position = row[pos]; + const applicationFormField: ApplicationFormField = { + baseFieldId: field.id, + position: typeof position === 'number' ? position : (counter += 1), + label: row[label], + }; + return applicationFormField; } - }).on('end', () => { + return undefined; + })).then((applicationFormFields) => { + applicationFormFields.forEach((field) => { + if (field !== undefined) { + applicationForm.fields.push(field); + } + }); jsonOutput.write(JSON.stringify(applicationForm)); jsonOutput.close(); + }).catch((error: unknown) => { + logger.error(`Error creating form fields: ${JSON.stringify(error)}`); }); }).catch((error: AxiosError) => { logger.error({ error }, 'Error getting base fields'); diff --git a/src/generateBaseFieldsInserts.ts b/src/generateBaseFieldsInserts.ts index fdf5386..9d7ac25 100644 --- a/src/generateBaseFieldsInserts.ts +++ b/src/generateBaseFieldsInserts.ts @@ -2,12 +2,15 @@ // The CSV is usually derived from the following URL using xlsx export and `xslx2csv`: // https://docs.google.com/spreadsheets/d/1Ep3_MEIyIbhxJ5TpH5x4Q1fRZqr1CFHXZ_uv3fEOSEk import { - createReadStream, createWriteStream, + readFileSync, } from 'fs'; import { EOL } from 'os'; -import CsvReadableStream from 'csv-reader'; +import { AssertionError } from 'assert'; +import { parse as csvParse } from 'csv-parse/sync'; import { parse } from 'ts-command-line-args'; +import { assertIsCsvRow } from './csv'; +import { logger } from './logger'; interface Args { inputFile: string; @@ -19,38 +22,40 @@ const args = parse({ outputFile: String, }); -interface CsvRow { - Label: string, - 'Internal field name' : string, - Type: string -} - -const csvInput = createReadStream(args.inputFile, 'utf8'); +const csvParser = csvParse(readFileSync(args.inputFile, 'utf8'), { + columns: true, + skip_records_with_empty_values: true, +}) as unknown[]; const sqlOutput = createWriteStream(args.outputFile, 'utf8'); sqlOutput.write(`INSERT INTO base_fields (label, short_code, data_type) VALUES${EOL}`); let firstRowArrived = false; -csvInput.pipe( - new CsvReadableStream({ - parseNumbers: true, - parseBooleans: true, - trim: true, - allowQuotes: true, - asObject: true, - }), -).on('data', (row: CsvRow) => { + +Promise.all(csvParser.map((row) => { + assertIsCsvRow(row); const label = row.Label; + if (typeof label !== 'string') { + throw new AssertionError({ message: 'Expected label to be a string' }); + } const shortCode = row['Internal field name']; + if (typeof shortCode !== 'string') { + throw new AssertionError({ message: 'Expected shortCode to be a string' }); + } const dataType = row.Type; + if (typeof dataType !== 'string') { + throw new AssertionError({ message: 'Expected dataType to be a string' }); + } if (firstRowArrived) { - sqlOutput.write(`,${EOL}('${label}', '${shortCode}', '${dataType}' )`); - } else { - sqlOutput.write(`('${label}', '${shortCode}', '${dataType}' )`); + return `,${EOL}('${label}', '${shortCode}', '${dataType}' )`; } firstRowArrived = true; -}).on('end', () => { + return `('${label}', '${shortCode}', '${dataType}' )`; +})).then((insertStatements) => { + insertStatements.forEach((statement) => sqlOutput.write(statement)); sqlOutput.write(`;${EOL}`); sqlOutput.close(); +}).catch((error: unknown) => { + logger.error(error, 'Error while reading CSV or writing SQL.'); }); diff --git a/src/postProposalVersions.ts b/src/postProposalVersions.ts index b86bdc1..61d1d2b 100644 --- a/src/postProposalVersions.ts +++ b/src/postProposalVersions.ts @@ -1,10 +1,11 @@ // Takes a CSV file, creates JSON bodies for POST /proposalVersions, posts them. import { readFileSync } from 'fs'; -import { AssertionError } from 'assert'; import { parse as csvParse } from 'csv-parse/sync'; import { parse as argParse } from 'ts-command-line-args'; import axios, { AxiosError } from 'axios'; +import { assertIsCsvRow } from './csv'; import { logger } from './logger'; +import type { CsvRow } from './csv'; interface Args { inputFile: string; @@ -15,18 +16,6 @@ interface Args { apiUrl: string; } -type CsvRow = Record; - -function assertIsCsvRow(row: unknown): asserts row is CsvRow { - if (row === null - || typeof row !== 'object' - || Object.keys(row).length === 0 - || Object.keys(row).some((key) => typeof key !== 'string') - || Object.values(row).some((value) => typeof value !== 'string')) { - throw new AssertionError({ message: 'Given row is not a CsvRow!' }); - } -} - interface Applicant { readonly id: number; externalId: string;