Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@
<artifactId>postgresql</artifactId>
<version>42.7.1</version>
</dependency>
<!-- pgvector Java客户端 -->
<!-- Weaviate Java客户端 -->
<dependency>
<groupId>com.pgvector</groupId>
<artifactId>pgvector</artifactId>
<version>0.1.4</version>
<groupId>io.weaviate</groupId>
<artifactId>client</artifactId>
<version>4.8.0</version>
</dependency>
<!-- LangChain4j 嵌入模型 -->
<dependency>
Expand Down Expand Up @@ -181,7 +181,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
Expand Down Expand Up @@ -235,14 +235,6 @@
<artifactId>langchain4j-reactor</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<!--向量数据库-->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-pgvector</artifactId>
<version>${langchain4j.version}</version>
</dependency>


<!-- 工程模块 -->
<dependency>
<groupId>com.achobeta</groupId>
Expand Down
6 changes: 3 additions & 3 deletions refine-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
</dependency>
<!--向量数据库-->
<!-- Weaviate客户端 -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-pgvector</artifactId>
<groupId>io.weaviate</groupId>
<artifactId>client</artifactId>
</dependency>

<!-- 工程模块;启动依赖 trigger->domain, infrastructure-->
Expand Down
161 changes: 33 additions & 128 deletions refine-app/src/main/java/com/achobeta/config/RagConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.filter.Filter;
import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import jakarta.annotation.PostConstruct;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -24,10 +23,6 @@
import org.springframework.context.annotation.Configuration;

import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.stream.Collectors;

Expand All @@ -46,21 +41,22 @@ public class RagConfig {

@PostConstruct
public void initialize() {
this.embeddingStore = PgVectorEmbeddingStore.builder()
.host(ragConfigProperties.getHost())
.port(ragConfigProperties.getPort())
.database(ragConfigProperties.getDatabase())
.user(ragConfigProperties.getUser())
.password(ragConfigProperties.getPassword())
.table("knowledge_embeddings")
.dimension(qwenEmbeddingModel.dimension())
.build();
log.info("EmbeddingStore初始化完成");
try {
log.info("开始初始化Weaviate EmbeddingStore");

// 暂时跳过EmbeddingStore初始化,因为我们使用自定义的向量存储
log.info("RagConfig初始化完成,使用自定义向量存储服务");

} catch (Exception e) {
log.error("RagConfig初始化失败", e);
}
}

@Bean
public EmbeddingStore<TextSegment> embeddingStore() {
return this.embeddingStore;
// 返回null,因为我们使用自定义的向量存储服务
log.info("使用自定义向量存储服务,不创建EmbeddingStore Bean");
return null;
}

@Bean
Expand All @@ -75,29 +71,8 @@ public ContentRetriever contentRetriever() {
// 获取目录下的文件(排除子目录,只考虑文件)
File[] files = docDir.listFiles(File::isFile);
if (files != null && files.length > 0) {
// 先清空向量表的所有旧数据
truncateEmbeddingTable(); // 全删逻辑(替换原来的按文件名删)

// 加载文档并重新入库
List<Document> documents = FileSystemDocumentLoader.loadDocuments(docPath);
log.info("待录入文档总数:{} 个,开始分段并生成向量", documents.size());


// 文档分段
DocumentByParagraphSplitter documentSplitter = new DocumentByParagraphSplitter(800, 350);
// 自定义文档加载器,把文档转换成向量并存储到向量数据库中
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(documentSplitter)
.textSegmentTransformer(textSegment -> TextSegment.from(
textSegment.metadata().getString("file_name") + "\n" + textSegment.text(),
textSegment.metadata()
))
.embeddingModel(qwenEmbeddingModel)
.embeddingStore(embeddingStore)
.build();
// 加载文档
ingestor.ingest(documents);
log.info("文档全量入库完成! 共加载 {} 个文件,已清空旧数据,当前表中为最新全量数据", files.length);
// TODO: 使用Weaviate实现文档向量化和存储
log.warn("RagConfig暂未实现Weaviate文档向量化,跳过文档加载");
} else {
log.info("文档目录 {} 下无可用文件,跳过文档加载(表中数据保持不变)", docPath);
}
Expand All @@ -109,100 +84,30 @@ public ContentRetriever contentRetriever() {
return new ContentRetriever() {
@Override
public List<Content> retrieve(Query query) {
// 1. 从查询元数据中动态提取业务传入的学科(target)
Object obj = query.metadata().invocationContext().chatMemoryId();
if (null == obj) {
log.warn("RAG检索 - 未传递 file_name 筛选条件,返回空结果");
try {
log.info("ContentRetriever收到查询请求: {}", query.text());

// 暂时返回空结果,因为我们主要关注学习行为记录
// 后续可以集成Weaviate进行内容检索
log.info("ContentRetriever暂时返回空结果,专注于学习行为记录功能");
return List.of();

} catch (Exception e) {
log.error("ContentRetriever查询失败", e);
return List.of();
}
String target = String.valueOf(obj);
log.info("RAG检索 - 动态过滤 file_name(模糊匹配): {}", target);

// 2. 生成查询向量
Embedding embeddedQuery = qwenEmbeddingModel.embed(query.text()).content();

// 3. 用自定义的Filter构建动态过滤条件
//Filter filter = MetadataFilterBuilder.metadataKey("subject").isEqualTo(subject);

//模糊匹配,自定义的Filter构建动态过滤条件
Filter filter = MetadataFilterBuilder.metadataKey("file_name").containsString(target);

// 4. 执行带动态过滤的检索
EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(embeddedQuery)
.maxResults(5) // 最多返回5条
.minScore(0.75) // 相关度阈值
.filter(filter) // 动态过滤条件
.build();

EmbeddingSearchResult<TextSegment> searchResult = embeddingStore.search(searchRequest);
log.info("RAG检索 - 匹配到 {} 条结果", searchResult.matches().size());
// 5. 转换结果为Content列表
return searchResult.matches().stream()
.map(match -> Content.from(match.embedded()))
.collect(Collectors.toList());
}
};
}

private void truncateEmbeddingTable() {
// 方案1:TRUNCATE(推荐)- 快速清空表,不记录单行删除日志,效率极高
String sql = String.format("TRUNCATE TABLE %s CASCADE", "knowledge_embeddings");
// CASCADE:若表有外键关联,同时清空关联表(无外键可去掉CASCADE)
// 注释掉PgVector相关的表操作方法
// private void truncateEmbeddingTable() {
// // TODO: 使用Weaviate实现数据清理
// log.warn("RagConfig暂未实现Weaviate数据清理");
// }

// 方案2:DELETE(备选,无TRUNCATE权限时使用)
// String sql = String.format("DELETE FROM %s", VECTOR_TABLE);

try (
// 复用配置类的连接信息,避免硬编码
Connection conn = DriverManager.getConnection(
String.format("jdbc:postgresql://%s:%d/%s",
ragConfigProperties.getHost(),
ragConfigProperties.getPort(),
ragConfigProperties.getDatabase()),
ragConfigProperties.getUser(),
ragConfigProperties.getPassword()
);
PreparedStatement pstmt = conn.prepareStatement(sql)
) {
log.info("开始清空向量表:{}", "knowledge_embeddings");
pstmt.executeUpdate();
log.info("向量表 {} 清空成功!", "knowledge_embeddings");

} catch (SQLException e) {
// 若TRUNCATE失败(如无权限),自动降级为DELETE重试(可选逻辑)
if (e.getMessage().contains("permission denied for table") || e.getMessage().contains("TRUNCATE")) {
log.warn("TRUNCATE权限不足,尝试用DELETE清空表", e);
deleteAllEmbeddingData(); // 调用DELETE全删方法
return;
}

log.error("清空向量表 {} 失败", "knowledge_embeddings", e);
throw new RuntimeException("清空旧数据异常,终止文档入库", e);
}
}

/**
* 备选方法:用DELETE清空全表(无TRUNCATE权限时触发)
*/
private void deleteAllEmbeddingData() {
String sql = String.format("DELETE FROM %s", "knowledge_embeddings");
try (
Connection conn = DriverManager.getConnection(
String.format("jdbc:postgresql://%s:%d/%s",
ragConfigProperties.getHost(),
ragConfigProperties.getPort(),
ragConfigProperties.getDatabase()),
ragConfigProperties.getUser(),
ragConfigProperties.getPassword()
);
PreparedStatement pstmt = conn.prepareStatement(sql)
) {
int deletedRows = pstmt.executeUpdate();
log.info("用DELETE清空表 {} 成功,共删除 {} 条记录", "knowledge_embeddings", deletedRows);
} catch (SQLException e) {
log.error("DELETE清空表 {} 失败", "knowledge_embeddings", e);
throw new RuntimeException("删除旧数据异常,终止文档入库", e);
}
}
// private void deleteAllEmbeddingData() {
// // TODO: 使用Weaviate实现数据删除
// log.warn("RagConfig暂未实现Weaviate数据删除");
// }
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
import org.springframework.boot.context.properties.ConfigurationProperties;

@Data
@ConfigurationProperties(prefix = "pgvector.config", ignoreInvalidFields = true)
@ConfigurationProperties(prefix = "weaviate.config", ignoreInvalidFields = true)
public class RagConfigProperties {

/** host:ip */
private String host;
/** 端口 */
private int port;
/** 数据库名 */
private String database;
/** 用户名 */
private String user;
/** 账密 */
private String password;
/** 向量维度 */
//private String vectorDimension;
/** scheme */
private String scheme;
/** api-key */
private String apiKey;
/** timeout */
private int timeout;
/** class-name */
private String className;

}
86 changes: 86 additions & 0 deletions refine-app/src/main/java/com/achobeta/config/WeaviateConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package com.achobeta.config;

import io.weaviate.client.Config;
import io.weaviate.client.WeaviateAuthClient;
import io.weaviate.client.WeaviateClient;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

/**
* @Auth : Malog
* @Desc : Weaviate客户端配置
* @Time : 2025/11/28
*/
@Slf4j
@Configuration
public class WeaviateConfig {

@Value("${weaviate.config.host:localhost}")
private String host;

@Value("${weaviate.config.port:8080}")
private int port;

@Value("${weaviate.config.scheme:http}")
private String scheme;

@Value("${weaviate.config.api-key:}")
private String apiKey;

@Value("${weaviate.config.timeout:30000}")
private int timeout;

@Value("${weaviate.config.class-name:LearningVector}")
private String className;

/**
* 创建Weaviate客户端Bean
*/
@Bean
public WeaviateClient weaviateClient() {
try {
// 构建连接URL - 对于云服务,不需要端口号
String url;
Config config;

if (port == 443 && "https".equals(scheme)) {
// Weaviate Cloud服务,不需要端口号
url = scheme + "://" + host;
config = new Config(scheme, host);
} else {
// 本地服务,需要端口号
url = scheme + "://" + host + ":" + port;
config = new Config(scheme, host + ":" + port);
}

WeaviateClient client;

// 如果配置了API Key,使用认证客户端
if (apiKey != null && !apiKey.trim().isEmpty()) {
log.info("使用API Key认证连接Weaviate");
client = WeaviateAuthClient.apiKey(config, apiKey);
} else {
// 无认证连接
log.info("使用无认证方式连接Weaviate");
client = new WeaviateClient(config);
}

log.info("Weaviate客户端初始化成功,连接地址: {}", url);
return client;

} catch (Exception e) {
log.error("Weaviate客户端初始化失败", e);
throw new RuntimeException("Failed to initialize Weaviate client", e);
}
}

/**
* 获取Weaviate类名配置
*/
@Bean
public String weaviateClassName() {
return className;
}
}
14 changes: 8 additions & 6 deletions refine-app/src/main/resources/application-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,15 @@ redis:
ping-interval: 60000
keep-alive: true

pgvector:
# Weaviate 向量数据库配置
weaviate:
config:
host: ${PGVECTOR_IP:postgres}
port: 5432
database: postgres
user: root
password: 123456
host: ${WEAVIATE_HOST:fmamdtzprxs1hi3olvplxg.c0.asia-southeast1.gcp.weaviate.cloud}
port: ${WEAVIATE_PORT:443}
scheme: ${WEAVIATE_SCHEME:https}
api-key: ${WEAVIATE_API_KEY:YWtxam9aYkxzbFBkdWlCdV9IOThJWHdHdGNHSUplTE9VeERtSWk4a216emN3Z3pXVnUzNVZKSG9JeEFZPV92MjAw} # Weaviate Cloud API Key
timeout: 30000 # 连接超时时间(毫秒)
class-name: LearningVector # Weaviate中的类名

# MyBatis 配置【如需使用记得打开】
mybatis:
Expand Down
Loading
Loading