Skip to content

Commit 1781385

Browse files
committed
feat: halfvec v1
1 parent db2c0a0 commit 1781385

File tree

3 files changed

+121
-6
lines changed

3 files changed

+121
-6
lines changed

packages/service/common/vectorStore/pg/class.ts

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,24 @@ export class PgVectorCtrl {
2121
CREATE EXTENSION IF NOT EXISTS vector;
2222
CREATE TABLE IF NOT EXISTS ${DatasetVectorTableName} (
2323
id BIGSERIAL PRIMARY KEY,
24-
vector VECTOR(1536) NOT NULL,
24+
halfvector HALFVEC(1536) NOT NULL,
2525
team_id VARCHAR(50) NOT NULL,
2626
dataset_id VARCHAR(50) NOT NULL,
2727
collection_id VARCHAR(50) NOT NULL,
2828
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP
2929
);
3030
`);
3131

32-
await PgClient.query(
33-
`CREATE INDEX CONCURRENTLY IF NOT EXISTS vector_index ON ${DatasetVectorTableName} USING hnsw (vector vector_ip_ops) WITH (m = 32, ef_construction = 128);`
34-
);
3532
await PgClient.query(
3633
`CREATE INDEX CONCURRENTLY IF NOT EXISTS team_dataset_collection_index ON ${DatasetVectorTableName} USING btree(team_id, dataset_id, collection_id);`
3734
);
3835
await PgClient.query(
3936
`CREATE INDEX CONCURRENTLY IF NOT EXISTS create_time_index ON ${DatasetVectorTableName} USING btree(createtime);`
4037
);
38+
// TODO: enable halfvector index
39+
// await PgClient.query(
40+
// `CREATE INDEX CONCURRENTLY IF NOT EXISTS halfvector_index ON ${DatasetVectorTableName} USING hnsw (halfvector halfvec_ip_ops) WITH (m = 32, ef_construction = 128);`
41+
// );
4142

4243
addLog.info('init pg successful');
4344
} catch (error) {
@@ -48,10 +49,12 @@ export class PgVectorCtrl {
4849
const { teamId, datasetId, collectionId, vector, retry = 3 } = props;
4950

5051
try {
52+
// TODO: remove vector
5153
const { rowCount, rows } = await PgClient.insert(DatasetVectorTableName, {
5254
values: [
5355
[
5456
{ key: 'vector', value: `[${vector}]` },
57+
{ key: 'halfvector', value: `[${vector}]` },
5558
{ key: 'team_id', value: String(teamId) },
5659
{ key: 'dataset_id', value: String(datasetId) },
5760
{ key: 'collection_id', value: String(collectionId) }
@@ -177,12 +180,27 @@ export class PgVectorCtrl {
177180
// );
178181
// console.log(explan[2].rows);
179182

183+
// TODO: use halfvector
184+
// const results: any = await PgClient.query(
185+
// `
186+
// BEGIN;
187+
// SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
188+
// select id, collection_id, halfvector <#> '[${vector}]' AS score
189+
// from ${DatasetVectorTableName}
190+
// where team_id='${teamId}'
191+
// AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
192+
// ${filterCollectionIdSql}
193+
// ${forbidCollectionSql}
194+
// order by score limit ${limit};
195+
// COMMIT;`
196+
// );
197+
180198
const results: any = await PgClient.query(
181199
`
182200
BEGIN;
183201
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
184-
select id, collection_id, vector <#> '[${vector}]' AS score
185-
from ${DatasetVectorTableName}
202+
select id, collection_id, vector <#> '[${vector}]' AS score
203+
from ${DatasetVectorTableName}
186204
where team_id='${teamId}'
187205
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
188206
${filterCollectionIdSql}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import type { NextApiRequest, NextApiResponse } from 'next';
2+
import { jsonRes } from '@fastgpt/service/common/response';
3+
import { connectToDatabase } from '@/service/mongo';
4+
import { authCert } from '@fastgpt/service/support/permission/auth/common';
5+
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
6+
import { DatasetVectorTableName } from '@fastgpt/service/common/vectorStore/constants';
7+
8+
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
9+
try {
10+
await connectToDatabase();
11+
await authCert({ req, authRoot: true });
12+
13+
// pg 新建字段:halfvector
14+
const columnExists = await PgClient.query(`
15+
SELECT column_name
16+
FROM information_schema.columns
17+
WHERE table_name='${DatasetVectorTableName}' AND column_name='halfvector';
18+
`);
19+
20+
if (columnExists.rows.length === 0) {
21+
await PgClient.query(`
22+
BEGIN;
23+
ALTER TABLE ${DatasetVectorTableName} ADD COLUMN halfvector halfvec(1536);
24+
COMMIT;
25+
`);
26+
console.log('halfvector column added');
27+
}
28+
29+
let rowsUpdated;
30+
do {
31+
rowsUpdated = await PgClient.query(`
32+
WITH updated AS (
33+
UPDATE ${DatasetVectorTableName}
34+
SET halfvector = vector::halfvec(1536)
35+
WHERE id IN (
36+
SELECT id
37+
FROM ${DatasetVectorTableName}
38+
WHERE halfvector IS NULL
39+
LIMIT 1000
40+
)
41+
RETURNING 1
42+
)
43+
SELECT count(*) FROM updated;
44+
`);
45+
console.log('rowsUpdated:', rowsUpdated.rows[0].count);
46+
// 每批次更新后休眠一段时间
47+
if (rowsUpdated.rows[0].count > 0) {
48+
await new Promise((resolve) => setTimeout(resolve, 100)); // 休眠100毫秒
49+
}
50+
} while (rowsUpdated.rows[0].count > 0);
51+
52+
jsonRes(res, {
53+
message: 'success'
54+
});
55+
} catch (error) {
56+
console.log(error);
57+
58+
jsonRes(res, {
59+
code: 500,
60+
error
61+
});
62+
}
63+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import type { NextApiRequest, NextApiResponse } from 'next';
2+
import { jsonRes } from '@fastgpt/service/common/response';
3+
import { connectToDatabase } from '@/service/mongo';
4+
import { authCert } from '@fastgpt/service/support/permission/auth/common';
5+
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
6+
import { DatasetVectorTableName } from '@fastgpt/service/common/vectorStore/constants';
7+
8+
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
9+
try {
10+
await connectToDatabase();
11+
await authCert({ req, authRoot: true });
12+
13+
// 设置halfvector字段为非空
14+
await PgClient.query(
15+
`BEGIN;
16+
ALTER TABLE ${DatasetVectorTableName} ALTER COLUMN halfvector SET NOT NULL;
17+
DROP INDEX IF EXISTS vector_index;
18+
ALTER TABLE ${DatasetVectorTableName} DROP COLUMN IF EXISTS vector;
19+
COMMIT;
20+
`
21+
);
22+
23+
jsonRes(res, {
24+
message: 'success'
25+
});
26+
} catch (error) {
27+
console.log(error);
28+
29+
jsonRes(res, {
30+
code: 500,
31+
error
32+
});
33+
}
34+
}

0 commit comments

Comments
 (0)