@@ -4,16 +4,12 @@ import fsp from 'fs/promises';
44import fs from 'fs' ;
55import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type' ;
66import { MongoChatFileSchema , MongoDatasetFileSchema } from './schema' ;
7- import { detectFileEncoding , detectFileEncodingByPath } from '@fastgpt/global/common/file/tools' ;
8- import { CommonErrEnum } from '@fastgpt/global/common/error/code/common' ;
9- import { readRawContentByFileBuffer } from '../read/utils' ;
10- import { computeGridFsChunSize , gridFsStream2Buffer , stream2Encoding } from './utils' ;
7+ import { detectFileEncodingByPath } from '@fastgpt/global/common/file/tools' ;
8+ import { computeGridFsChunSize , stream2Encoding } from './utils' ;
119import { addLog } from '../../system/log' ;
12- import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools' ;
1310import { Readable } from 'stream' ;
14- import { addRawTextBuffer , getRawTextBuffer } from '../../buffer/rawText/controller' ;
15- import { addMinutes } from 'date-fns' ;
1611import { retryFn } from '@fastgpt/global/common/system/utils' ;
12+ import { getS3DatasetSource } from '../../s3/sources/dataset' ;
1713
1814export function getGFSCollection ( bucket : `${BucketNameEnum } `) {
1915 MongoDatasetFileSchema ;
@@ -162,11 +158,17 @@ export async function delFileByFileIdList({
162158 fileIdList : string [ ] ;
163159} ) : Promise < any > {
164160 return retryFn ( async ( ) => {
161+ const s3DatasetSource = getS3DatasetSource ( ) ;
162+
165163 const bucket = getGridBucket ( bucketName ) ;
166164
167165 for await ( const fileId of fileIdList ) {
168166 try {
169- await bucket . delete ( new Types . ObjectId ( String ( fileId ) ) ) ;
167+ if ( s3DatasetSource . isDatasetObjectKey ( fileId ) ) {
168+ await s3DatasetSource . deleteDatasetFileByKey ( fileId ) ;
169+ } else {
170+ await bucket . delete ( new Types . ObjectId ( String ( fileId ) ) ) ;
171+ }
170172 } catch ( error : any ) {
171173 if ( typeof error ?. message === 'string' && error . message . includes ( 'File not found' ) ) {
172174 addLog . warn ( 'File not found' , { fileId } ) ;
@@ -189,78 +191,3 @@ export async function getDownloadStream({
189191
190192 return bucket . openDownloadStream ( new Types . ObjectId ( fileId ) ) ;
191193}
192-
193- export const readFileContentFromMongo = async ( {
194- teamId,
195- tmbId,
196- bucketName,
197- fileId,
198- customPdfParse = false ,
199- getFormatText,
200- usageId
201- } : {
202- teamId : string ;
203- tmbId : string ;
204- bucketName : `${BucketNameEnum } `;
205- fileId : string ;
206- customPdfParse ?: boolean ;
207- getFormatText ?: boolean ; // 数据类型都尽可能转化成 markdown 格式
208- usageId ?: string ;
209- } ) : Promise < {
210- rawText : string ;
211- filename : string ;
212- } > => {
213- const bufferId = `${ String ( fileId ) } -${ customPdfParse } ` ;
214- // read buffer
215- const fileBuffer = await getRawTextBuffer ( bufferId ) ;
216- if ( fileBuffer ) {
217- return {
218- rawText : fileBuffer . text ,
219- filename : fileBuffer ?. sourceName
220- } ;
221- }
222-
223- const [ file , fileStream ] = await Promise . all ( [
224- getFileById ( { bucketName, fileId } ) ,
225- getDownloadStream ( { bucketName, fileId } )
226- ] ) ;
227- if ( ! file ) {
228- return Promise . reject ( CommonErrEnum . fileNotFound ) ;
229- }
230-
231- const extension = parseFileExtensionFromUrl ( file ?. filename ) ;
232-
233- const start = Date . now ( ) ;
234- const fileBuffers = await gridFsStream2Buffer ( fileStream ) ;
235- addLog . debug ( 'get file buffer' , { time : Date . now ( ) - start } ) ;
236-
237- const encoding = file ?. metadata ?. encoding || detectFileEncoding ( fileBuffers ) ;
238-
239- // Get raw text
240- const { rawText } = await readRawContentByFileBuffer ( {
241- customPdfParse,
242- usageId,
243- getFormatText,
244- extension,
245- teamId,
246- tmbId,
247- buffer : fileBuffers ,
248- encoding,
249- metadata : {
250- relatedId : fileId
251- }
252- } ) ;
253-
254- // Add buffer
255- addRawTextBuffer ( {
256- sourceId : bufferId ,
257- sourceName : file . filename ,
258- text : rawText ,
259- expiredTime : addMinutes ( new Date ( ) , 20 )
260- } ) ;
261-
262- return {
263- rawText,
264- filename : file . filename
265- } ;
266- } ;
0 commit comments