diff --git a/data/marc-rules.json b/data/marc-rules.json new file mode 100644 index 00000000..8e30e48f --- /dev/null +++ b/data/marc-rules.json @@ -0,0 +1,62 @@ +[ + { + "marcIndicatorRegExp": "3610 ", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "365..", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "5410.", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "541 .", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "5420 ", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "5610.", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "561 .", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "5830.", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "583 .", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + }, + { + "marcIndicatorRegExp": "5900.", + "subfieldSpec": { "subfields": [], "directive": "include" }, + "label": "", + "directive": "exclude" + } +] diff --git a/lib/annotated-marc-serializer.js b/lib/annotated-marc-serializer.js index 99e717a5..24ace60f 100644 --- a/lib/annotated-marc-serializer.js +++ b/lib/annotated-marc-serializer.js @@ -37,6 +37,7 @@ const arrayUnique = require('./util').arrayUnique const relatorMappings = require('../data/relator-mappings.json') +const { varFieldMatches, buildSourceWithMasking } = require('./marc-util') class AnnotatedMarcSerializer { } @@ -133,32 +134,13 @@ AnnotatedMarcSerializer.matchingMarcFields = function (bib, rule) { * * @return {boolean} */ -AnnotatedMarcSerializer.varFieldMatches = function (field, rule) { - const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` - return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && - rule.fieldTag === field.fieldTag -} +AnnotatedMarcSerializer.varFieldMatches = varFieldMatches /** * Given a varField, returns a copy with any hidden subfield content replaced * with "[redacted]" based on given rule */ -AnnotatedMarcSerializer.buildSourceWithMasking = function (field, rule) { - return Object.assign({}, field, { - subfields: (field.subfields || []) - .map((subfield) => { - let subfieldContent = subfield.content - // If directive is 'include' and subfield not included - // .. or directive is 'exclude', but subfield included, - // [redact] it: - if ((rule.subfieldSpec.directive === 'include' && rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0) || - (rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0)) { - subfieldContent = '[redacted]' - } - return Object.assign({}, subfield, { content: subfieldContent }) - }) - }) -} +AnnotatedMarcSerializer.buildSourceWithMasking = buildSourceWithMasking /** * Get prefix for a marctag & subfield, given a previous subfield (if avail.) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js new file mode 100644 index 00000000..426f4976 --- /dev/null +++ b/lib/marc-serializer.js @@ -0,0 +1,168 @@ +/** + * @typedef {object} MarcRuleSubfieldSpec + * @property {array} subfields - Array of subfields to match for suppression + * @property {string} directive - Indicates whether the matching subfields + * should be "include"d or "exclude"d + */ +/** + * @typedef {object} MarcRule + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} marcIndicatorRegExp - Stringified regex for matching a + * VarField tag joined to 1st and 2nd indicators + * @property {MarcRuleSubfieldSpec} subfieldSpec - How to match subfields + * @property {string} directive - Whether to include/exclude if matched. + */ + +/** + * @typedef {object} SubField + * @property {string} tag - Identifying tag (e.g. '6', 'a') + * @property {string} content - Value of subfield + */ + +/** + * @typedef {object} VarField + * * @property {string} marcTag - Three digit number classifying field (e.g. '100') + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} content - Root level content (usually null/ignored) + * @property {array} subfields + * @property {string|null} ind1 - First indicator character (space if blank) + * @property {string|null} ind2 - Second indicator character (space if blank) + */ + +/** + * @typedef {object} SerializedBib + * @property {string} id - Bib ID + * @property {string} nyplSource - MARC source + * @property {array} fields - Array of varFields after suppression + */ + +/** + * @typedef {object} SerializedMarc + * @property {SerializedBib} bib - The serialized bib object containing varFields + */ + +const { varFieldMatches } = require('./marc-util') + +class MarcSerializer {} + +// Load rules +MarcSerializer.mappingRules = require('../data/marc-rules.json') + .map((rule) => { + return Object.assign({}, rule, { + marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) + }) + }) + +/** + * Returns true if a field matches a given MARC rule + * @param {VarField} field - MARC field to test + * @param {MarcRule} rule - Rule to match against + * @returns {boolean} + */ +MarcSerializer.varFieldMatches = varFieldMatches + +MarcSerializer.describeField = function (field) { + return `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` +} + +/** + * Finds linked 880 fields (parallel scripts) for a given field + * @param {Bib} bib - Bib object containing varFields + * @param {VarField} sourceField - Field to find parallels for + * @returns {Array} Array of parallel 880 fields + */ +MarcSerializer.findParallelFields = function (bib, sourceField) { + const linkNumbers = extractLinkingNumbers(sourceField) + if (linkNumbers.length === 0) return [] + + return bib.varFields.filter((field) => + isLinked880Field(field, linkNumbers) + ) +} + +/** + * Extracts linking numbers from subfield 6, removing the 880- prefix + */ +function extractLinkingNumbers (varField) { + return (varField.subfields || []) + // Is a MARC linking subfield ($6)? + .filter((subfield) => subfield.tag === '6') + .map((subfield) => subfield.content.replace(/^880-/, '')) +} + +/** + * Determines whether a field is an 880 field linked to any of the given numbers + */ +function isLinked880Field (field, linkNumbers) { + if (field.marcTag !== '880' || !field.subfields) return false + + const fieldLinks = field.subfields + // Is a MARC linking subfield ($6)? + .filter((subfield) => subfield.tag === '6') + .map((subfield) => subfield.content) + + return fieldLinks.some((link) => + linkNumbers.some((linkNumber) => isMatchingLink(link, linkNumber)) + ) +} + +/** + * Checks whether a link contains the link number at position 4 + */ +function isMatchingLink (link, linkNumber) { + return link.indexOf(linkNumber) === 4 +} + +/** + * Serializes a bib with excluded fields + * @param {Bib} bib - Bib to serialize + * @returns {SerializedMarc} Serialized bib + */ +MarcSerializer.serialize = function (bib) { + // Keep track of 880 parallels to exclude + const excludedLinkNumbers = new Set() + + const serializedVarFields = bib.varFields.filter((field) => { + // Check if this 880 field is linked to an excluded source + if (field.marcTag === '880') { + const fieldLinks = field.subfields + .filter(sf => sf.tag === '6') + .map(sf => sf.content) + + const shouldExclude = fieldLinks.some(link => + Array.from(excludedLinkNumbers).some(ln => + link.indexOf(ln) === 4 + ) + ) + + if (shouldExclude) return false + } + + // Find matching rule for this field + const matchingRule = MarcSerializer.mappingRules.find((rule) => + MarcSerializer.varFieldMatches(field, rule) + ) + + if (!matchingRule) return true + + // If field is excluded, mark its link numbers for excluding 880 parallels + if (matchingRule.directive === 'exclude') { + const linkNumbers = extractLinkingNumbers(field) + linkNumbers.forEach((ln) => excludedLinkNumbers.add(ln)) + return false + } + + // Otherwise, keep the field + return true + }) + + return { + bib: { + id: bib.id, + nyplSource: bib.nyplSource, + fields: serializedVarFields + } + } +} + +module.exports = MarcSerializer diff --git a/lib/marc-util.js b/lib/marc-util.js new file mode 100644 index 00000000..b892908d --- /dev/null +++ b/lib/marc-util.js @@ -0,0 +1,43 @@ +/** + * * Returns true if a field matches a given MARC rule + * @param {VarField} field + * @param {MarcRule} rule + * @returns {boolean} + */ +function varFieldMatches (field, rule) { + const indicator = `${field.marcTag || ''}${field.ind1 || ' '}${field.ind2 || ' '}` + + if (rule.fieldTag && rule.fieldTag !== field.fieldTag) { + return false + } + + return rule.marcIndicatorRegExp.test(indicator) +} + +/** + * Returns a copy of a varField with removed subfields according to the rule + * @param {VarField} field + * @param {MarcRule} rule + * @returns {VarField} + */ +function buildSourceWithMasking (field, rule) { + return { + ...field, + subfields: (field.subfields || []).filter((subfield) => { + if ( + (rule.subfieldSpec.directive === 'include' && + !rule.subfieldSpec.subfields.includes(subfield.tag)) || + (rule.subfieldSpec.directive === 'exclude' && + rule.subfieldSpec.subfields.includes(subfield.tag)) + ) { + return false + } + return true + }) + } +} + +module.exports = { + varFieldMatches, + buildSourceWithMasking +} diff --git a/lib/models/Location.js b/lib/models/Location.js index 49592f09..b73ab0f0 100644 --- a/lib/models/Location.js +++ b/lib/models/Location.js @@ -9,7 +9,7 @@ class Location { } get deliverableToResolution () { - if (this.nyplCoreLocation) { + if (this.nyplCoreLocation?.deliverableToResolution) { return this.nyplCoreLocation.deliverableToResolution } else if (this.recapCustomerCode) return 'recap-customer-code' } diff --git a/lib/resources.js b/lib/resources.js index f342cd3b..959f019c 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -8,6 +8,7 @@ const AggregationSerializer = require('./jsonld_serializers.js').AggregationSeri const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer const LocationLabelUpdater = require('./location_label_updater') const AnnotatedMarcSerializer = require('./annotated-marc-serializer') +const MarcSerializer = require('./marc-serializer') const { makeNyplDataApiClient } = require('./data-api-client') const { IndexSearchError, IndexConnectionError } = require('./errors') @@ -231,6 +232,30 @@ module.exports = function (app, _private = null) { .then(AnnotatedMarcSerializer.serialize) } + // Get a single raw marc: + app.resources.marc = async function (params, opts) { + // Convert discovery id to nyplSource and un-prefixed id: + const nyplSourceMapper = await NyplSourceMapper.instance() + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + + if (!id || !nyplSource) { + throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + } + + app.logger.debug('Resources#marc', { id, nyplSource }) + + return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + .then((resp) => { + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) + } else { + return resp.data + } + }) + .then(MarcSerializer.serialize) + } + function itemsByFilter (filter, opts) { opts = Object.assign({ _source: null diff --git a/routes/resources.js b/routes/resources.js index 8e1dab30..dbc159d0 100644 --- a/routes/resources.js +++ b/routes/resources.js @@ -106,6 +106,8 @@ module.exports = function (app) { if (req.params.ext === 'annotated-marc') { handler = app.resources.annotatedMarc + } else if (req.params.ext === 'marc') { + handler = app.resources.marc } return handler(params, { baseUrl: app.baseUrl }, req) diff --git a/test/annotated-marc-rules.test.js b/test/annotated-marc-rules.test.js index 18723665..a3086611 100644 --- a/test/annotated-marc-rules.test.js +++ b/test/annotated-marc-rules.test.js @@ -329,13 +329,10 @@ describe('Annotated Marc Rules', function () { expect(maskedSource).to.be.a('object') expect(maskedSource.marcTag).to.equal('245') expect(maskedSource.subfields).to.be.a('array') - expect(maskedSource.subfields).to.have.lengthOf(2) + expect(maskedSource.subfields).to.have.lengthOf(1) expect(maskedSource.subfields[0]).to.be.a('object') expect(maskedSource.subfields[0].tag).to.equal('a') expect(maskedSource.subfields[0].content).to.equal('Razvedchik') - expect(maskedSource.subfields[1]).to.be.a('object') - expect(maskedSource.subfields[1].tag).to.equal('h') - expect(maskedSource.subfields[1].content).to.equal('[redacted]') }) it('should mask subfields not included', function () { @@ -360,13 +357,10 @@ describe('Annotated Marc Rules', function () { expect(maskedSource).to.be.a('object') expect(maskedSource.marcTag).to.equal('245') expect(maskedSource.subfields).to.be.a('array') - expect(maskedSource.subfields).to.have.lengthOf(2) + expect(maskedSource.subfields).to.have.lengthOf(1) expect(maskedSource.subfields[0]).to.be.a('object') - expect(maskedSource.subfields[0].tag).to.equal('a') - expect(maskedSource.subfields[0].content).to.equal('[redacted]') - expect(maskedSource.subfields[1]).to.be.a('object') - expect(maskedSource.subfields[1].tag).to.equal('h') - expect(maskedSource.subfields[1].content).to.equal('[microform] :') + expect(maskedSource.subfields[0].tag).to.equal('h') + expect(maskedSource.subfields[0].content).to.equal('[microform] :') }) }) diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js new file mode 100644 index 00000000..206a5a74 --- /dev/null +++ b/test/marc-serializer.test.js @@ -0,0 +1,167 @@ +const { expect } = require('chai') +const MarcSerializer = require('../lib/marc-serializer') + +// Mock mapping rules +MarcSerializer.mappingRules = [ + // 700 to be excluded + { + marcIndicatorRegExp: /^700/, + directive: 'exclude', + subfieldSpec: null + }, + { + fieldTag: 'y', + marcIndicatorRegExp: /^856/, + directive: 'include', + subfieldSpec: { + subfields: [], + directive: 'include' + } + } +] + +const sampleBib = { + id: 'testId', + nyplSource: 'testSource', + varFields: [ + { + fieldTag: 'a', + marcTag: '100', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Porter, Bertha,' }, + { tag: 'd', content: '1852-1941.' } + ] + }, + { + fieldTag: 'y', + marcTag: '008', + content: ' cyyyy2011nyua f 000 faeng dnam a ', + ind1: '', + ind2: '', + subfields: [] + }, + { + fieldTag: 't', + marcTag: '245', + content: null, + ind1: '1', + ind2: '0', + subfields: [ + { tag: 'a', content: 'Topographical bibliography of ancient Egyptian hieroglyphic texts, reliefs, and paintings /' }, + { tag: 'c', content: 'by Bertha Porter and Rosalind L.B. Moss.' } + ] + }, + { + fieldTag: 'b', + marcTag: '700', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Moss, Rosalind L. B.' }, + { tag: 'q', content: '(Rosalind Louisa Beaufort)' } + ] + }, + { + fieldTag: 'y', + marcTag: '856', + content: null, + subfields: [ + { tag: 'u', content: 'This should be redacted' }, + { tag: 'z', content: 'This is ok' } + ], + ind1: '4', + ind2: '0' + }, + { + fieldTag: '_', + marcTag: null, + content: '00000cam 2200769Ia 4500', + subfields: [], + ind1: null, + ind2: null + } + ] +} + +const sampleBibWithExcludedSourceAndParallel = { + id: 'testId', + nyplSource: 'testSource', + varFields: [ + { + marcTag: '700', + ind1: '1', + ind2: ' ', + subfields: [ + { tag: '6', content: '880-02/$1' }, + { tag: 'a', content: 'Some name' } + ] + }, + { + marcTag: '880', + ind1: '1', + ind2: ' ', + subfields: [ + { tag: '6', content: '700-02/$1' }, + { tag: 'a', content: '並列表記' } + ] + } + ] +} + +describe('MarcSerializer', () => { + describe('serialize', () => { + let serialized + before(() => { + serialized = MarcSerializer.serialize(sampleBib) + }) + + it('preserves leader field', () => { + const leader = serialized.bib.fields.find(f => f.fieldTag === '_') + expect(leader.content).to.equal('00000cam 2200769Ia 4500') + }) + + it('preserves non-suppressed fields', () => { + const field100 = serialized.bib.fields.find(f => f.marcTag === '100') + expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') + }) + + it('keeps surviving fields present', () => { + const tags = serialized.bib.fields.map(f => f.marcTag) + // Null is the leader, 700 is removed + expect(tags).to.include.members([null, '100', '245', '856']) + }) + }) + + describe('serialize removes parallel 880 when source field is excluded', () => { + const serialized = MarcSerializer.serialize(sampleBibWithExcludedSourceAndParallel) + + it('removes the source 700 field', () => { + const field700 = serialized.bib.fields.find(f => f.marcTag === '700') + expect(field700).to.equal(undefined) + }) + + it('removes the linked 880 field', () => { + const field880 = serialized.bib.fields.find(f => f.marcTag === '880') + expect(field880).to.equal(undefined) + }) + }) + + describe('findParallelFields', () => { + it('returns empty array when no 880 fields are present', () => { + const field100 = sampleBib.varFields.find(f => f.marcTag === '100') + const parallels = MarcSerializer.findParallelFields(sampleBib, field100) + expect(parallels).to.be.an('array') + expect(parallels).to.have.lengthOf(0) + }) + it('returns correct parallel 880 for a field', () => { + const field700 = sampleBibWithExcludedSourceAndParallel.varFields.find(f => f.marcTag === '700') + const parallels = MarcSerializer.findParallelFields(sampleBibWithExcludedSourceAndParallel, field700) + expect(parallels).to.have.lengthOf(1) + expect(parallels[0].marcTag).to.equal('880') + }) + }) +})