Skip to content

Commit d03c611

Browse files
committed
fix: remove parsed images' TTL
1 parent 8e8d492 commit d03c611

File tree

13 files changed

+146
-71
lines changed

13 files changed

+146
-71
lines changed

packages/service/common/file/read/utils.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,15 @@ export const readS3FileContentByBuffer = async ({
180180
base64Img: `data:${item.mime};base64,${item.base64}`,
181181
uploadKey: `${prefix}/${item.uuid}.${ext}`,
182182
mimetype: Mimes[ext as keyof typeof Mimes],
183-
filename: `${item.uuid}.${ext}`,
183+
filename: `${item.uuid}${ext}`,
184184
expiredTime
185185
});
186186
} catch (error) {
187187
return `[Image Upload Failed: ${item.uuid}]`;
188188
}
189189
})();
190190
rawText = rawText.replace(item.uuid, src);
191+
// rawText = rawText.replace(item.uuid, jwtSignS3ObjectKey(src, addDays(new Date(), 90)));
191192
if (formatText) {
192193
formatText = formatText.replace(item.uuid, src);
193194
}

packages/service/common/s3/controller.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { addLog } from '../system/log';
33
import { setCron } from '../system/cron';
44
import { checkTimerLock } from '../system/timerLock/utils';
55
import { TimerIdEnum } from '../system/timerLock/constants';
6+
import path from 'node:path';
67

78
export async function clearExpiredMinioFiles() {
89
try {
@@ -26,6 +27,25 @@ export async function clearExpiredMinioFiles() {
2627

2728
if (bucket) {
2829
await bucket.delete(file.minioKey);
30+
31+
if (!file.minioKey.includes('-parsed/')) {
32+
try {
33+
const dir = path.dirname(file.minioKey);
34+
const basename = path.basename(file.minioKey);
35+
const ext = path.extname(basename);
36+
37+
if (ext) {
38+
const nameWithoutExt = path.basename(basename, ext);
39+
const parsedPrefix = `${dir}/${nameWithoutExt}-parsed`;
40+
41+
await bucket.addDeleteJob({ prefix: parsedPrefix });
42+
addLog.info(`Scheduled deletion of parsed images: ${parsedPrefix}`);
43+
}
44+
} catch (error) {
45+
addLog.debug(`Failed to schedule parsed images deletion for ${file.minioKey}`);
46+
}
47+
}
48+
2949
await MongoS3TTL.deleteOne({ _id: file._id });
3050

3151
success++;

packages/service/common/s3/sources/chat/index.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
ChatFileUploadSchema,
88
DelChatFileByPrefixSchema
99
} from './type';
10+
import { addHours, differenceInHours } from 'date-fns';
1011

1112
class S3ChatSource {
1213
private bucket: S3PrivateBucket;
@@ -61,9 +62,12 @@ class S3ChatSource {
6162
}
6263

6364
async createUploadChatFileURL(params: CheckChatFileKeys) {
64-
const { appId, chatId, uId, filename } = ChatFileUploadSchema.parse(params);
65+
const { appId, chatId, uId, filename, expiredTime } = ChatFileUploadSchema.parse(params);
6566
const rawKey = [S3Sources.chat, appId, uId, chatId, `${getNanoid(6)}-${filename}`].join('/');
66-
return await this.bucket.createPostPresignedUrl({ rawKey, filename }, { expiredHours: 24 });
67+
return await this.bucket.createPostPresignedUrl(
68+
{ rawKey, filename },
69+
{ expiredHours: expiredTime ? differenceInHours(new Date(), expiredTime) : 24 }
70+
);
6771
}
6872

6973
deleteChatFilesByPrefix(params: DelChatFileByPrefixParams) {

packages/service/common/s3/sources/chat/type.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ export const ChatFileUploadSchema = z.object({
55
appId: ObjectIdSchema,
66
chatId: z.string().nonempty(),
77
uId: z.string().nonempty(),
8-
filename: z.string().nonempty()
8+
filename: z.string().nonempty(),
9+
expiredTime: z.date().optional()
910
});
1011
export type CheckChatFileKeys = z.infer<typeof ChatFileUploadSchema>;
1112

packages/service/common/s3/utils.ts

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ import { getNanoid } from '@fastgpt/global/common/string/tools';
1111
import path from 'node:path';
1212
import { randomUUID } from 'node:crypto';
1313
import type { ParsedFileContentS3KeyParams } from './sources/dataset/type';
14-
15-
export function jwtSignS3ObjectKey(objectKey: string) {
14+
import { EndpointUrl } from '@fastgpt/global/common/file/constants';
15+
16+
/**
17+
*
18+
* @param objectKey
19+
* @param expiredTime
20+
* @returns
21+
*/
22+
export function jwtSignS3ObjectKey(objectKey: string, expiredTime: Date) {
1623
const secret = process.env.FILE_TOKEN_KEY as string;
17-
const now = new Date();
18-
const expiresIn = differenceInSeconds(addDays(now, 90), now);
24+
const expiresIn = differenceInSeconds(expiredTime, new Date());
1925
const token = jwt.sign({ objectKey }, secret, { expiresIn });
2026

21-
return token;
27+
return `${EndpointUrl}/api/system/file/${token}`;
2228
}
2329

2430
export function jwtVerifyS3ObjectKey(token: string) {
@@ -95,15 +101,22 @@ export async function uploadImage2S3Bucket(
95101
return uploadKey;
96102
}
97103

104+
export const getFileNameFromPresignedURL = (presignedURL: string) => {
105+
const url = new URL(presignedURL);
106+
const fullname = url.pathname.split('/').pop()!;
107+
const filename = path.basename(fullname, path.extname(fullname));
108+
return decodeURIComponent(filename);
109+
};
110+
98111
export const ParsedFileContentS3Key = {
99112
// 临时的文件路径(比如 evaluation)
100113
temp: (appId: string) => {
101-
return `${S3Sources.chat}/${appId}/temp/${randomUUID()}`;
114+
return `${S3Sources.tmp}/${appId}/temp/${randomUUID()}`;
102115
},
103116

104117
// 对话中上传的文件的解析结果的图片的 Key
105118
chat: ({ appId, chatId, uId }: { chatId: string; uId: string; appId: string }) => {
106-
return `${S3Sources.chat}/${appId}/${uId}/${chatId}/parsed`;
119+
return `${S3Sources.chat}/${appId}/${uId}/${chatId}`;
107120
},
108121

109122
// 上传数据集的文件的解析结果的图片的 Key

packages/service/core/dataset/data/controller.ts

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,14 @@ export const formatDatasetDataValue = ({
5858
};
5959
}
6060

61-
const previewUrl =
62-
getS3DatasetSource().isDatasetObjectKey(imageId) || getS3ChatSource().isChatFileKey(imageId)
63-
? imageId
64-
: getDatasetImagePreviewUrl({
65-
imageId,
66-
teamId,
67-
datasetId,
68-
expiredMinutes: 60 * 24 * 7 // 7 days
69-
});
61+
const previewUrl = getS3DatasetSource().isDatasetObjectKey(imageId)
62+
? imageId
63+
: getDatasetImagePreviewUrl({
64+
imageId,
65+
teamId,
66+
datasetId,
67+
expiredMinutes: 60 * 24 * 7 // 7 days
68+
});
7069

7170
return {
7271
q: `![${q.replaceAll('\n', '')}](${previewUrl})`,

packages/service/core/dataset/search/controller.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import { datasetSearchQueryExtension } from './utils';
3333
import type { RerankModelItemType } from '@fastgpt/global/core/ai/model.d';
3434
import { formatDatasetDataValue } from '../data/controller';
3535
import { pushTrack } from '../../../common/middle/tracks/utils';
36+
import { replaceDatasetQuoteTextWithJWT } from '../../../core/dataset/utils';
37+
import { addHours } from 'date-fns';
3638

3739
export type SearchDatasetDataProps = {
3840
histories: ChatItemType[];
@@ -53,7 +55,7 @@ export type SearchDatasetDataProps = {
5355
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
5456
[NodeInputKeyEnum.datasetSearchRerankWeight]?: number;
5557

56-
/*
58+
/*
5759
{
5860
tags: {
5961
$and: ["str1","str2"],
@@ -230,7 +232,7 @@ export async function searchDatasetData(
230232
};
231233
};
232234

233-
/*
235+
/*
234236
Collection metadata filter
235237
标签过滤:
236238
1. and 先生效
@@ -903,10 +905,15 @@ export async function searchDatasetData(
903905
// token filter
904906
const filterMaxTokensResult = await filterDatasetDataByMaxTokens(scoreFilter, maxTokens);
905907

908+
const finalResult = filterMaxTokensResult.map((item) => {
909+
item.q = replaceDatasetQuoteTextWithJWT(item.q, addHours(new Date(), 1));
910+
return item;
911+
});
912+
906913
pushTrack.datasetSearch({ datasetIds, teamId });
907914

908915
return {
909-
searchRes: filterMaxTokensResult,
916+
searchRes: finalResult,
910917
embeddingTokens,
911918
reRankInputTokens,
912919
searchMode,

packages/service/core/dataset/utils.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,19 @@ export const filterDatasetsByTmbId = async ({
3939
* 替换数据集引用 markdown 文本中的图片链接格式的 S3 对象键为 JWT 签名后的 URL
4040
*
4141
* @param datasetQuoteText 数据集引用文本
42+
* @param expiredTime 过期时间
4243
* @returns 替换后的文本
4344
*
4445
* @example
4546
*
4647
* ```typescript
4748
* const datasetQuoteText = '![image.png](dataset/68fee42e1d416bb5ddc85b19/6901c3071ba2bea567e8d8db/aZos7D-214afce5-4d42-4356-9e05-8164d51c59ae.png)';
48-
* const replacedText = await replaceDatasetQuoteTextWithJWT(datasetQuoteText)
49+
* const replacedText = await replaceDatasetQuoteTextWithJWT(datasetQuoteText, addDays(new Date(), 90))
4950
* console.log(replacedText)
5051
* // '![image.png](http://localhost:3000/api/system/file/eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJvYmplY3RLZXkiOiJjaGF0LzY5MWFlMjlkNDA0ZDA0Njg3MTdkZDc0Ny82OGFkODVhNzQ2MzAwNmM5NjM3OTlhMDcvalhmWHk4eWZHQUZzOVdKcGNXUmJBaFYyL3BhcnNlZC85YTBmNGZlZC00ZWRmLTQ2MTMtYThkNi01MzNhZjVhZTUxZGMucG5nIiwiaWF0IjoxNzYzMzcwOTYwLCJleHAiOjk1MzkzNzA5NjB9.tMDWg0-ZWRnWPNp9Hakd0w1hhaO8jj2oD98SU0wAQYQ)'
5152
* ```
5253
*/
53-
export async function replaceDatasetQuoteTextWithJWT(datasetQuoteText: string) {
54+
export function replaceDatasetQuoteTextWithJWT(datasetQuoteText: string, expiredTime: Date) {
5455
if (!datasetQuoteText || typeof datasetQuoteText !== 'string') return datasetQuoteText as string;
5556

5657
const prefixPattern = Object.values(S3Sources)
@@ -67,8 +68,7 @@ export async function replaceDatasetQuoteTextWithJWT(datasetQuoteText: string) {
6768
const [full, bang, alt, objectKey] = match;
6869

6970
if (s3DatasetSource.isDatasetObjectKey(objectKey) || s3ChatSource.isChatFileKey(objectKey)) {
70-
const token = jwtSignS3ObjectKey(objectKey);
71-
const url = `${EndpointUrl}/api/system/file/${token}`;
71+
const url = jwtSignS3ObjectKey(objectKey, expiredTime);
7272
const replacement = `${bang}[${alt}](${url})`;
7373
content =
7474
content.slice(0, match.index) + replacement + content.slice(match.index + full.length);

packages/service/core/workflow/dispatch/ai/chat.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ import { postTextCensor } from '../../../chat/postTextCensor';
4242
import { createLLMResponse } from '../../../ai/llm/request';
4343
import { formatModelChars2Points } from '../../../../support/wallet/usage/utils';
4444
import { replaceDatasetQuoteTextWithJWT } from '../../../dataset/utils';
45-
import { ParsedFileContentS3Key } from '../../../../common/s3/utils';
45+
import { getFileNameFromPresignedURL, ParsedFileContentS3Key } from '../../../../common/s3/utils';
46+
import { addDays } from 'date-fns';
4647

4748
export type ChatProps = ModuleDispatchProps<
4849
AIChatNodeProps & {
@@ -311,7 +312,8 @@ async function filterDatasetQuote({
311312
: '';
312313

313314
return {
314-
datasetQuoteText: await replaceDatasetQuoteTextWithJWT(datasetQuoteText)
315+
// datasetQuoteText: replaceDatasetQuoteTextWithJWT(datasetQuoteText, addDays(new Date(), 90))
316+
datasetQuoteText
315317
};
316318
}
317319

packages/service/core/workflow/dispatch/tools/readFiles.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ import { type ChatItemType, type UserChatItemValueItemType } from '@fastgpt/glob
1313
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
1414
import { addLog } from '../../../../common/system/log';
1515
import { addRawTextBuffer, getRawTextBuffer } from '../../../../common/buffer/rawText/controller';
16-
import { addMinutes } from 'date-fns';
16+
import { addDays, addMinutes } from 'date-fns';
1717
import { getNodeErrResponse } from '../utils';
1818
import { isInternalAddress } from '../../../../common/system/utils';
1919
import { replaceDatasetQuoteTextWithJWT } from '../../../dataset/utils';
20-
import { ParsedFileContentS3Key } from '../../../../common/s3/utils';
20+
import { getFileNameFromPresignedURL, ParsedFileContentS3Key } from '../../../../common/s3/utils';
2121

2222
type Props = ModuleDispatchProps<{
2323
[NodeInputKeyEnum.fileUrlList]: string[];
@@ -65,6 +65,23 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
6565
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
6666

6767
try {
68+
console.dir(
69+
{
70+
urls: [...fileUrlList, ...filesFromHistories],
71+
requestOrigin,
72+
maxFiles,
73+
teamId,
74+
tmbId,
75+
customPdfParse,
76+
usageId,
77+
fileS3Prefix: ParsedFileContentS3Key.chat({
78+
appId: props.runningAppInfo.id,
79+
chatId: props.chatId!,
80+
uId: props.uid
81+
})
82+
},
83+
{ depth: null }
84+
);
6885
const { text, readFilesResult } = await getFileContentFromLinks({
6986
// Concat fileUrlList and filesFromHistories; remove not supported files
7087
urls: [...fileUrlList, ...filesFromHistories],
@@ -241,12 +258,12 @@ export const getFileContentFromLinks = async ({
241258
customPdfParse,
242259
getFormatText: true,
243260
imageKeyOptions: {
244-
prefix: fileS3Prefix
261+
prefix: `${fileS3Prefix}/${getFileNameFromPresignedURL(url)}-parsed`
245262
},
246263
usageId
247264
});
248265

249-
const replacedText = await replaceDatasetQuoteTextWithJWT(rawText);
266+
const replacedText = replaceDatasetQuoteTextWithJWT(rawText, addDays(new Date(), 90));
250267

251268
// Add to buffer
252269
addRawTextBuffer({

0 commit comments

Comments
 (0)