-
Notifications
You must be signed in to change notification settings - Fork 4
2gb upload limit fix #352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
2gb upload limit fix #352
Changes from all commits
a6e9c9e
6dafa4c
ee3769b
8950899
ffe00e3
81efecb
bf00abf
aecec96
26122a8
f7cf790
1915ce5
f76c0a5
52f84ca
f0678e5
0aea737
b010f0e
6693d10
0116923
60ff6f9
bd5515c
38ab536
698fb76
82b040a
76c78a3
5261c38
eee9028
65f5602
d766da9
8e0d752
4b432ae
c80aea4
1ff7bd5
d314a62
96f6a75
336b33c
5510ca3
1e24f81
28a7ffd
5944a5f
ca88b20
981285f
599fa28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -5,6 +5,7 @@ import Config from "./Config"; | |||||||
| import { DatastreamParameters, Fedora } from "../services/Fedora"; | ||||||||
| import FedoraDataCollector from "../services/FedoraDataCollector"; | ||||||||
| import { execSync } from "child_process"; | ||||||||
| import crypto = require("crypto"); | ||||||||
| import { Agent } from "../services/interfaces"; | ||||||||
|
|
||||||||
| export interface ObjectParameters { | ||||||||
|
|
@@ -76,8 +77,34 @@ export class FedoraObject { | |||||||
| await this.fedora.deleteDatastreamTombstone(this.pid, stream); | ||||||||
| } | ||||||||
|
|
||||||||
| async computeDigestHeaderForFile(filename: string): Promise<string> { | ||||||||
| // Compute digest by streaming the file once (avoids loading the whole file into memory) | ||||||||
| const md5Hash = crypto.createHash("md5"); | ||||||||
| const sha512Hash = crypto.createHash("sha512"); | ||||||||
| await new Promise<void>((resolve, reject) => { | ||||||||
| const rs = fs.createReadStream(filename); | ||||||||
| rs.on("data", (chunk: Buffer) => { | ||||||||
| md5Hash.update(chunk); | ||||||||
| sha512Hash.update(chunk); | ||||||||
| }); | ||||||||
| rs.on("end", () => resolve()); | ||||||||
| rs.on("error", (err) => reject(err)); | ||||||||
| }); | ||||||||
| const md5 = md5Hash.digest("hex"); | ||||||||
| const sha512 = sha512Hash.digest("hex"); | ||||||||
| const digestHeader = `md5=${md5}, sha-512=${sha512}`; | ||||||||
| return digestHeader; | ||||||||
|
Comment on lines
+95
to
+96
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could simplify to a direct return since the variable isn't really used for anything:
Suggested change
|
||||||||
| } | ||||||||
|
|
||||||||
| async addDatastreamFromFile(filename: string, stream: string, mimeType: string): Promise<void> { | ||||||||
| await this.addDatastreamFromStringOrBuffer(fs.readFileSync(filename), stream, mimeType, [201]); | ||||||||
| // Create a fresh read stream for the upload | ||||||||
| const readStream = fs.createReadStream(filename); | ||||||||
| const digestHeader = await this.computeDigestHeaderForFile(filename); | ||||||||
| const params: DatastreamParameters = { | ||||||||
| mimeType: mimeType, | ||||||||
| logMessage: "Initial Ingest addDatastream - " + stream, | ||||||||
| }; | ||||||||
| await this.fedora.addDatastream(this.pid, stream, params, readStream, [201], digestHeader); | ||||||||
| } | ||||||||
|
|
||||||||
| async updateDatastreamFromFile(filename: string, stream: string, mimeType: string): Promise<void> { | ||||||||
|
|
@@ -106,6 +133,16 @@ export class FedoraObject { | |||||||
| logMessage: "Initial Ingest addDatastream - MASTER-MD", | ||||||||
| }; | ||||||||
| const fitsXml = this.fitsMasterMetadata(filename); | ||||||||
|
|
||||||||
| // Check if MASTER-MD exists and delete it if it does | ||||||||
| try { | ||||||||
| const checkResponse = await this.fedora.getDatastream(this.pid, "MASTER-MD"); | ||||||||
| if (checkResponse.statusCode === 200) { | ||||||||
| await this.deleteDatastream("MASTER-MD"); | ||||||||
| } | ||||||||
| } catch (e) { | ||||||||
| // No existing MASTER-MD to delete | ||||||||
| } | ||||||||
| await this.addDatastream("MASTER-MD", params, fitsXml, [201, 204]); | ||||||||
| } | ||||||||
|
|
||||||||
|
|
@@ -221,6 +258,10 @@ export class FedoraObject { | |||||||
| return this.fedora.getDatastreamAsBuffer(this.pid, datastream); | ||||||||
| } | ||||||||
|
|
||||||||
| async downloadDatastreamToTempFile(datastream: string, treatMissingAsEmpty = false): Promise<string> { | ||||||||
| return this.fedora.downloadDatastreamToTempFile(this.pid, datastream, treatMissingAsEmpty); | ||||||||
| } | ||||||||
|
|
||||||||
| async getDatastreamMetadata(datastream: string): Promise<string> { | ||||||||
| return await this.fedora.getRdf(`${this.pid}/${datastream}/fcr:metadata`); | ||||||||
| } | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,8 @@ import xmlescape = require("xml-escape"); | |
| import { HttpError } from "../models/HttpError"; | ||
| import winston = require("winston"); | ||
| import SolrCache from "./SolrCache"; | ||
| import fs = require("fs"); | ||
| import tmp = require("tmp"); | ||
|
|
||
| export interface DatastreamParameters { | ||
| dsLabel?: string; | ||
|
|
@@ -58,7 +60,7 @@ export class Fedora { | |
| protected _request( | ||
| method = "get", | ||
| _path = "/", | ||
| data: string | Buffer = null, | ||
| data: string | Buffer | NodeJS.ReadableStream = null, | ||
| _options: Record<string, unknown> = {}, | ||
| ): Promise<NeedleResponse> { | ||
| const path = _path[0] == "/" ? _path.slice(1) : _path; | ||
|
|
@@ -70,7 +72,11 @@ export class Fedora { | |
| password: this.config.fedoraPassword, | ||
| }; | ||
| const options = Object.assign({}, auth, _options); | ||
| return http(method, url, data, options); | ||
|
|
||
| return http(method, url, data, options).catch((err) => { | ||
| console.error(`Request failed for ${method.toUpperCase()} ${url}:`, err); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a number of |
||
| throw err; | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -201,6 +207,65 @@ export class Fedora { | |
| ); | ||
| } | ||
|
|
||
| /** | ||
| * Download a datastream directly to a temporary file to avoid buffering | ||
| * large files into memory. | ||
| * | ||
| * @param pid Record id | ||
| * @param datastream Which stream to request | ||
| * @param treatMissingAsEmpty If true, return empty temp file on 404 | ||
| */ | ||
| async downloadDatastreamToTempFile(pid: string, datastream: string, treatMissingAsEmpty = false): Promise<string> { | ||
| const url = this.config.restBaseUrl + "/" + pid + "/" + datastream; | ||
| const auth = { | ||
| username: this.config.fedoraUsername, | ||
| password: this.config.fedoraPassword, | ||
| }; | ||
|
|
||
| return new Promise((resolve, reject) => { | ||
| const tmpobj = tmp.fileSync(); | ||
| const writeStream = fs.createWriteStream(tmpobj.name); | ||
|
|
||
| const req = http.get(url, auth); | ||
|
|
||
| req.on("response", (res) => { | ||
| if (res.statusCode === 200) { | ||
| req.pipe(writeStream); | ||
| writeStream.on("finish", () => { | ||
| resolve(tmpobj.name); | ||
| }); | ||
| writeStream.on("error", (err) => { | ||
| try { | ||
| fs.unlinkSync(tmpobj.name); | ||
| } catch (e) { | ||
| console.error(e); | ||
| } | ||
| reject(err); | ||
| }); | ||
| } else if (res.statusCode === 404 && treatMissingAsEmpty) { | ||
| // create empty file and return its path | ||
| writeStream.end(() => resolve(tmpobj.name)); | ||
| } else { | ||
| try { | ||
| fs.unlinkSync(tmpobj.name); | ||
| } catch (e) { | ||
| console.error(e); | ||
| } | ||
| reject(new Error("Unexpected response for " + pid + "/" + datastream + ": " + res.statusCode)); | ||
| } | ||
| }); | ||
|
|
||
| req.on("error", (err) => { | ||
| try { | ||
| fs.unlinkSync(tmpobj.name); | ||
| } catch (e) { | ||
| console.error(e); | ||
| } | ||
| reject(err); | ||
| }); | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
| * Get DC datastream from Fedora | ||
| * | ||
|
|
@@ -264,18 +329,31 @@ export class Fedora { | |
| stream: string, | ||
| mimeType: string, | ||
| expectedStatus = [201], | ||
| data: string | Buffer, | ||
| data: string | Buffer | NodeJS.ReadableStream, | ||
| linkHeader = "", | ||
| precomputedDigest = "", | ||
| ): Promise<void> { | ||
| this.cache.purgeFromCacheIfEnabled(pid); | ||
| const md5 = crypto.createHash("md5").update(data).digest("hex"); | ||
| const sha = crypto.createHash("sha512").update(data).digest("hex"); | ||
| const headers: Record<string, string> = { | ||
| "Overwrite-Tombstone": "true", | ||
| "Content-Disposition": 'attachment; filename="' + stream + '"', | ||
| "Content-Type": mimeType, | ||
| Digest: "md5=" + md5 + ", sha-512=" + sha, | ||
| }; | ||
|
|
||
| // If caller supplied a precomputed digest (for streaming upload), use it. | ||
| if (precomputedDigest && precomputedDigest.length > 0) { | ||
| headers.Digest = precomputedDigest; | ||
| } else { | ||
| // For string/Buffer payloads, compute digests here. | ||
| if (typeof data === "string" || Buffer.isBuffer(data)) { | ||
| const md5 = crypto.createHash("md5").update(data).digest("hex"); | ||
demiankatz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| const sha = crypto.createHash("sha512").update(data).digest("hex"); | ||
| headers.Digest = "md5=" + md5 + ", sha-512=" + sha; | ||
| } else { | ||
| // No precomputed digest and data is a stream — cannot compute here. | ||
| throw new Error("Streaming data requires a precomputed digest header to be provided"); | ||
| } | ||
| } | ||
| const options = { headers: headers }; | ||
| if (linkHeader.length > 0) { | ||
| options.headers.Link = linkHeader; | ||
|
|
@@ -306,13 +384,22 @@ export class Fedora { | |
| pid: string, | ||
| stream: string, | ||
| params: DatastreamParameters, | ||
| data: string | Buffer, | ||
| data: string | Buffer | NodeJS.ReadableStream, | ||
| expectedStatus = [201], | ||
| precomputedDigest = "", | ||
| ): Promise<void> { | ||
| this.cache.purgeFromCacheIfEnabled(pid); | ||
|
|
||
| // First create the stream: | ||
| await this.putDatastream(pid, stream, params.mimeType, expectedStatus, data, params.linkHeader ?? ""); | ||
| await this.putDatastream( | ||
| pid, | ||
| stream, | ||
| params.mimeType, | ||
| expectedStatus, | ||
| data, | ||
| params.linkHeader ?? "", | ||
| precomputedDigest, | ||
| ); | ||
|
|
||
| // Now set appropriate metadata: | ||
| const writer = new N3.Writer({ format: "text/turtle" }); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.