diff --git a/pom.xml b/pom.xml index bdebeaf..29ce182 100644 --- a/pom.xml +++ b/pom.xml @@ -76,11 +76,11 @@ postgresql 42.7.1 - + - com.pgvector - pgvector - 0.1.4 + io.weaviate + client + 4.8.0 @@ -181,7 +181,7 @@ org.apache.commons commons-lang3 - 3.9 + 3.12.0 com.google.guava @@ -235,14 +235,6 @@ langchain4j-reactor ${langchain4j.version} - - - dev.langchain4j - langchain4j-pgvector - ${langchain4j.version} - - - com.achobeta diff --git a/refine-app/pom.xml b/refine-app/pom.xml index 94e2d03..02bf62e 100644 --- a/refine-app/pom.xml +++ b/refine-app/pom.xml @@ -113,10 +113,10 @@ dev.langchain4j langchain4j - + - dev.langchain4j - langchain4j-pgvector + io.weaviate + client diff --git a/refine-app/src/main/java/com/achobeta/config/RagConfig.java b/refine-app/src/main/java/com/achobeta/config/RagConfig.java index 4e72263..6cd9ebf 100644 --- a/refine-app/src/main/java/com/achobeta/config/RagConfig.java +++ b/refine-app/src/main/java/com/achobeta/config/RagConfig.java @@ -15,7 +15,6 @@ import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder; -import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; import jakarta.annotation.PostConstruct; import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; @@ -24,10 +23,6 @@ import org.springframework.context.annotation.Configuration; import java.io.File; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; import java.util.List; import java.util.stream.Collectors; @@ -46,21 +41,22 @@ public class RagConfig { @PostConstruct public void initialize() { - this.embeddingStore = PgVectorEmbeddingStore.builder() - .host(ragConfigProperties.getHost()) - .port(ragConfigProperties.getPort()) - .database(ragConfigProperties.getDatabase()) - .user(ragConfigProperties.getUser()) - .password(ragConfigProperties.getPassword()) - .table("knowledge_embeddings") - .dimension(qwenEmbeddingModel.dimension()) - .build(); - log.info("EmbeddingStore初始化完成"); + try { + log.info("开始初始化Weaviate EmbeddingStore"); + + // 暂时跳过EmbeddingStore初始化,因为我们使用自定义的向量存储 + log.info("RagConfig初始化完成,使用自定义向量存储服务"); + + } catch (Exception e) { + log.error("RagConfig初始化失败", e); + } } @Bean public EmbeddingStore embeddingStore() { - return this.embeddingStore; + // 返回null,因为我们使用自定义的向量存储服务 + log.info("使用自定义向量存储服务,不创建EmbeddingStore Bean"); + return null; } @Bean @@ -75,29 +71,8 @@ public ContentRetriever contentRetriever() { // 获取目录下的文件(排除子目录,只考虑文件) File[] files = docDir.listFiles(File::isFile); if (files != null && files.length > 0) { - // 先清空向量表的所有旧数据 - truncateEmbeddingTable(); // 全删逻辑(替换原来的按文件名删) - - // 加载文档并重新入库 - List documents = FileSystemDocumentLoader.loadDocuments(docPath); - log.info("待录入文档总数:{} 个,开始分段并生成向量", documents.size()); - - - // 文档分段 - DocumentByParagraphSplitter documentSplitter = new DocumentByParagraphSplitter(800, 350); - // 自定义文档加载器,把文档转换成向量并存储到向量数据库中 - EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() - .documentSplitter(documentSplitter) - .textSegmentTransformer(textSegment -> TextSegment.from( - textSegment.metadata().getString("file_name") + "\n" + textSegment.text(), - textSegment.metadata() - )) - .embeddingModel(qwenEmbeddingModel) - .embeddingStore(embeddingStore) - .build(); - // 加载文档 - ingestor.ingest(documents); - log.info("文档全量入库完成! 共加载 {} 个文件,已清空旧数据,当前表中为最新全量数据", files.length); + // TODO: 使用Weaviate实现文档向量化和存储 + log.warn("RagConfig暂未实现Weaviate文档向量化,跳过文档加载"); } else { log.info("文档目录 {} 下无可用文件,跳过文档加载(表中数据保持不变)", docPath); } @@ -109,100 +84,30 @@ public ContentRetriever contentRetriever() { return new ContentRetriever() { @Override public List retrieve(Query query) { - // 1. 从查询元数据中动态提取业务传入的学科(target) - Object obj = query.metadata().invocationContext().chatMemoryId(); - if (null == obj) { - log.warn("RAG检索 - 未传递 file_name 筛选条件,返回空结果"); + try { + log.info("ContentRetriever收到查询请求: {}", query.text()); + + // 暂时返回空结果,因为我们主要关注学习行为记录 + // 后续可以集成Weaviate进行内容检索 + log.info("ContentRetriever暂时返回空结果,专注于学习行为记录功能"); + return List.of(); + + } catch (Exception e) { + log.error("ContentRetriever查询失败", e); return List.of(); } - String target = String.valueOf(obj); - log.info("RAG检索 - 动态过滤 file_name(模糊匹配): {}", target); - - // 2. 生成查询向量 - Embedding embeddedQuery = qwenEmbeddingModel.embed(query.text()).content(); - - // 3. 用自定义的Filter构建动态过滤条件 - //Filter filter = MetadataFilterBuilder.metadataKey("subject").isEqualTo(subject); - - //模糊匹配,自定义的Filter构建动态过滤条件 - Filter filter = MetadataFilterBuilder.metadataKey("file_name").containsString(target); - - // 4. 执行带动态过滤的检索 - EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder() - .queryEmbedding(embeddedQuery) - .maxResults(5) // 最多返回5条 - .minScore(0.75) // 相关度阈值 - .filter(filter) // 动态过滤条件 - .build(); - - EmbeddingSearchResult searchResult = embeddingStore.search(searchRequest); - log.info("RAG检索 - 匹配到 {} 条结果", searchResult.matches().size()); - // 5. 转换结果为Content列表 - return searchResult.matches().stream() - .map(match -> Content.from(match.embedded())) - .collect(Collectors.toList()); } }; } - private void truncateEmbeddingTable() { - // 方案1:TRUNCATE(推荐)- 快速清空表,不记录单行删除日志,效率极高 - String sql = String.format("TRUNCATE TABLE %s CASCADE", "knowledge_embeddings"); - // CASCADE:若表有外键关联,同时清空关联表(无外键可去掉CASCADE) + // 注释掉PgVector相关的表操作方法 + // private void truncateEmbeddingTable() { + // // TODO: 使用Weaviate实现数据清理 + // log.warn("RagConfig暂未实现Weaviate数据清理"); + // } - // 方案2:DELETE(备选,无TRUNCATE权限时使用) - // String sql = String.format("DELETE FROM %s", VECTOR_TABLE); - - try ( - // 复用配置类的连接信息,避免硬编码 - Connection conn = DriverManager.getConnection( - String.format("jdbc:postgresql://%s:%d/%s", - ragConfigProperties.getHost(), - ragConfigProperties.getPort(), - ragConfigProperties.getDatabase()), - ragConfigProperties.getUser(), - ragConfigProperties.getPassword() - ); - PreparedStatement pstmt = conn.prepareStatement(sql) - ) { - log.info("开始清空向量表:{}", "knowledge_embeddings"); - pstmt.executeUpdate(); - log.info("向量表 {} 清空成功!", "knowledge_embeddings"); - - } catch (SQLException e) { - // 若TRUNCATE失败(如无权限),自动降级为DELETE重试(可选逻辑) - if (e.getMessage().contains("permission denied for table") || e.getMessage().contains("TRUNCATE")) { - log.warn("TRUNCATE权限不足,尝试用DELETE清空表", e); - deleteAllEmbeddingData(); // 调用DELETE全删方法 - return; - } - - log.error("清空向量表 {} 失败", "knowledge_embeddings", e); - throw new RuntimeException("清空旧数据异常,终止文档入库", e); - } - } - - /** - * 备选方法:用DELETE清空全表(无TRUNCATE权限时触发) - */ - private void deleteAllEmbeddingData() { - String sql = String.format("DELETE FROM %s", "knowledge_embeddings"); - try ( - Connection conn = DriverManager.getConnection( - String.format("jdbc:postgresql://%s:%d/%s", - ragConfigProperties.getHost(), - ragConfigProperties.getPort(), - ragConfigProperties.getDatabase()), - ragConfigProperties.getUser(), - ragConfigProperties.getPassword() - ); - PreparedStatement pstmt = conn.prepareStatement(sql) - ) { - int deletedRows = pstmt.executeUpdate(); - log.info("用DELETE清空表 {} 成功,共删除 {} 条记录", "knowledge_embeddings", deletedRows); - } catch (SQLException e) { - log.error("DELETE清空表 {} 失败", "knowledge_embeddings", e); - throw new RuntimeException("删除旧数据异常,终止文档入库", e); - } - } + // private void deleteAllEmbeddingData() { + // // TODO: 使用Weaviate实现数据删除 + // log.warn("RagConfig暂未实现Weaviate数据删除"); + // } } \ No newline at end of file diff --git a/refine-app/src/main/java/com/achobeta/config/RagConfigProperties.java b/refine-app/src/main/java/com/achobeta/config/RagConfigProperties.java index 6c84c24..1701596 100644 --- a/refine-app/src/main/java/com/achobeta/config/RagConfigProperties.java +++ b/refine-app/src/main/java/com/achobeta/config/RagConfigProperties.java @@ -4,20 +4,20 @@ import org.springframework.boot.context.properties.ConfigurationProperties; @Data -@ConfigurationProperties(prefix = "pgvector.config", ignoreInvalidFields = true) +@ConfigurationProperties(prefix = "weaviate.config", ignoreInvalidFields = true) public class RagConfigProperties { /** host:ip */ private String host; /** 端口 */ private int port; - /** 数据库名 */ - private String database; - /** 用户名 */ - private String user; - /** 账密 */ - private String password; - /** 向量维度 */ - //private String vectorDimension; + /** scheme */ + private String scheme; + /** api-key */ + private String apiKey; + /** timeout */ + private int timeout; + /** class-name */ + private String className; } diff --git a/refine-app/src/main/java/com/achobeta/config/WeaviateConfig.java b/refine-app/src/main/java/com/achobeta/config/WeaviateConfig.java new file mode 100644 index 0000000..a46d72c --- /dev/null +++ b/refine-app/src/main/java/com/achobeta/config/WeaviateConfig.java @@ -0,0 +1,86 @@ +package com.achobeta.config; + +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * @Auth : Malog + * @Desc : Weaviate客户端配置 + * @Time : 2025/11/28 + */ +@Slf4j +@Configuration +public class WeaviateConfig { + + @Value("${weaviate.config.host:localhost}") + private String host; + + @Value("${weaviate.config.port:8080}") + private int port; + + @Value("${weaviate.config.scheme:http}") + private String scheme; + + @Value("${weaviate.config.api-key:}") + private String apiKey; + + @Value("${weaviate.config.timeout:30000}") + private int timeout; + + @Value("${weaviate.config.class-name:LearningVector}") + private String className; + + /** + * 创建Weaviate客户端Bean + */ + @Bean + public WeaviateClient weaviateClient() { + try { + // 构建连接URL - 对于云服务,不需要端口号 + String url; + Config config; + + if (port == 443 && "https".equals(scheme)) { + // Weaviate Cloud服务,不需要端口号 + url = scheme + "://" + host; + config = new Config(scheme, host); + } else { + // 本地服务,需要端口号 + url = scheme + "://" + host + ":" + port; + config = new Config(scheme, host + ":" + port); + } + + WeaviateClient client; + + // 如果配置了API Key,使用认证客户端 + if (apiKey != null && !apiKey.trim().isEmpty()) { + log.info("使用API Key认证连接Weaviate"); + client = WeaviateAuthClient.apiKey(config, apiKey); + } else { + // 无认证连接 + log.info("使用无认证方式连接Weaviate"); + client = new WeaviateClient(config); + } + + log.info("Weaviate客户端初始化成功,连接地址: {}", url); + return client; + + } catch (Exception e) { + log.error("Weaviate客户端初始化失败", e); + throw new RuntimeException("Failed to initialize Weaviate client", e); + } + } + + /** + * 获取Weaviate类名配置 + */ + @Bean + public String weaviateClassName() { + return className; + } +} \ No newline at end of file diff --git a/refine-app/src/main/resources/application-dev.yml b/refine-app/src/main/resources/application-dev.yml index a6fe14b..729ff98 100644 --- a/refine-app/src/main/resources/application-dev.yml +++ b/refine-app/src/main/resources/application-dev.yml @@ -125,13 +125,15 @@ redis: ping-interval: 60000 keep-alive: true -pgvector: +# Weaviate 向量数据库配置 +weaviate: config: - host: ${PGVECTOR_IP:postgres} - port: 5432 - database: postgres - user: root - password: 123456 + host: ${WEAVIATE_HOST:fmamdtzprxs1hi3olvplxg.c0.asia-southeast1.gcp.weaviate.cloud} + port: ${WEAVIATE_PORT:443} + scheme: ${WEAVIATE_SCHEME:https} + api-key: ${WEAVIATE_API_KEY:YWtxam9aYkxzbFBkdWlCdV9IOThJWHdHdGNHSUplTE9VeERtSWk4a216emN3Z3pXVnUzNVZKSG9JeEFZPV92MjAw} # Weaviate Cloud API Key + timeout: 30000 # 连接超时时间(毫秒) + class-name: LearningVector # Weaviate中的类名 # MyBatis 配置【如需使用记得打开】 mybatis: diff --git a/refine-app/src/main/resources/mybatis/mapper/VectorMapper.xml b/refine-app/src/main/resources/mybatis/mapper/VectorMapper.xml deleted file mode 100644 index 43e0953..0000000 --- a/refine-app/src/main/resources/mybatis/mapper/VectorMapper.xml +++ /dev/null @@ -1,112 +0,0 @@ - - - - - - - INSERT INTO user_learning_vectors - (user_id, question_id, action_type, question_content, subject, knowledge_point_id) - VALUES - (#{userId}, #{questionId}, #{actionType}, #{questionContent}, #{subject}, #{knowledgePointId}) - - - - - - - - - - - - - - INSERT INTO learning_insights - (user_id, insight_type, title, description, confidence_score) - VALUES - (#{userId}, #{insight.type}, #{insight.title}, #{insight.description}, #{insight.confidenceScore}) - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/refine-domain/src/main/java/com/achobeta/domain/ocr/service/impl/DefaultOcrService.java b/refine-domain/src/main/java/com/achobeta/domain/ocr/service/impl/DefaultOcrService.java index 08f4a33..069cfdd 100644 --- a/refine-domain/src/main/java/com/achobeta/domain/ocr/service/impl/DefaultOcrService.java +++ b/refine-domain/src/main/java/com/achobeta/domain/ocr/service/impl/DefaultOcrService.java @@ -7,10 +7,12 @@ import com.achobeta.domain.ocr.model.entity.QuestionEntity; import com.achobeta.domain.ocr.service.IOcrService; import com.achobeta.domain.rag.service.IVectorService; -import com.achobeta.types.enums.ActionType; +import com.achobeta.domain.user.event.UserUploadEvent; import com.achobeta.types.exception.AppException; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.ApplicationEventPublisher; import org.springframework.stereotype.Service; import java.io.IOException; @@ -29,6 +31,9 @@ public class DefaultOcrService implements IOcrService { private final IFilePreprocessPort filePreprocessPort; private final IOcrPort ocrPort; private final IAiService aiService; + private final ApplicationEventPublisher eventPublisher; + + @Qualifier("weaviateVectorRepository") private final IVectorService vectorService; /** @@ -122,26 +127,21 @@ else if (lowerType.endsWith(".txt") || "txt".equals(lowerType)) { questionEntity.setQuestionId(uuid); questionEntity.setUserId(userId); - // 存储学习行为向量到向量数据库 + // 发布用户上传事件,由事件监听器处理向量存储 try { - // 调用向量服务存储学习向量,行为类型为"upload"表示用户上传题目 - boolean vectorStored = vectorService.storeLearningVector( + UserUploadEvent uploadEvent = new UserUploadEvent( + this, userId, uuid, recognizedText, - ActionType.UPLOAD.getActionType(), - null, - null + null, // subject 暂时为空,可以从AI分析中获取 + null // knowledgePointId 暂时为空,可以从AI分析中获取 ); - - if (vectorStored) { - log.info("成功存储题目向量到向量数据库,userId:{} questionId:{}", userId, uuid); - } else { - log.warn("存储题目向量到向量数据库失败,但不影响OCR主流程,userId:{} questionId:{}", userId, uuid); - } + eventPublisher.publishEvent(uploadEvent); + log.info("用户上传题目事件已发布,userId:{}, questionId:{}", userId, uuid); } catch (Exception e) { - // 向量存储失败不应该影响OCR主流程,只记录日志 - log.error("存储题目向量到向量数据库时发生异常,userId:{} questionId:{}", userId, uuid, e); + // 事件发布失败不应该影响OCR主流程,只记录日志 + log.error("发布用户上传题目事件时发生异常,userId:{} questionId:{}", userId, uuid, e); } // TODO 将题干存储到Redis中,通过uuid可以查询,设置24小时过期时间 diff --git a/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningAnalysisService.java b/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningAnalysisService.java index 544db96..b697c3e 100644 --- a/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningAnalysisService.java +++ b/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningAnalysisService.java @@ -7,6 +7,7 @@ import com.achobeta.domain.ai.service.IAiService; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; @@ -20,6 +21,7 @@ public class LearningAnalysisService { @Autowired + @Qualifier("weaviateVectorRepository") private IVectorService vectorService; @Autowired @@ -38,7 +40,15 @@ public void onUserLogin(String userId) { // 分析用户最近7天的学习动态 List dynamics = learningDynamicsService.analyzeUserLearningDynamics(userId); - log.info("用户登录学习动态分析完成,userId:{} 动态数量:{}", userId, dynamics.size()); + if (dynamics != null && !dynamics.isEmpty()) { + log.info("用户登录学习动态分析完成,userId:{} 动态数量:{}", userId, dynamics.size()); + + // 可以在这里将分析结果存储到缓存或数据库中,供前端查询使用 + // 例如:存储到Redis中,key为 "user_dynamics:" + userId + + } else { + log.info("用户登录学习动态分析完成,但未生成有效动态,userId:{}", userId); + } } catch (Exception e) { log.error("用户登录学习动态分析失败,userId:{}", userId, e); diff --git a/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningDynamicsServiceImpl.java b/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningDynamicsServiceImpl.java index c3d9479..44cf5a5 100644 --- a/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningDynamicsServiceImpl.java +++ b/refine-domain/src/main/java/com/achobeta/domain/rag/service/impl/LearningDynamicsServiceImpl.java @@ -15,6 +15,7 @@ import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; @Slf4j @@ -47,15 +48,24 @@ public List analyzeUserLearningDynamics(String userId) { // 3. 使用AI分析学习动态 List dynamics = new ArrayList<>(); + StringBuilder responseBuilder = new StringBuilder(); + // 异步执行AI分析任务,解析题目并生成学习动态 CompletableFuture aiAnalysis = CompletableFuture.runAsync(() -> { aiService.aiChat(analysisPrompt, response -> { try { - // 解析AI返回的JSON格式学习动态 - List parsedDynamics = parseAIResponse(response); - dynamics.addAll(parsedDynamics); + // 收集所有响应片段 + responseBuilder.append(response); + + // 检查是否收到完整的JSON响应 + String currentResponse = responseBuilder.toString(); + if (isCompleteJsonResponse(currentResponse)) { + // 解析完整的AI返回的JSON格式学习动态 + List parsedDynamics = parseAIResponse(currentResponse); + dynamics.addAll(parsedDynamics); + } } catch (Exception e) { - log.error("解析AI分析结果失败", e); + log.error("解析AI分析结果失败:{}", response, e); } }); }); @@ -64,9 +74,17 @@ public List analyzeUserLearningDynamics(String userId) { // 等待AI分析完成,最多等待30秒 try { aiAnalysis.get(30, TimeUnit.SECONDS); + + // 如果AI分析没有产生结果,使用基于规则的分析 + if (dynamics.isEmpty()) { + log.warn("AI分析未产生结果,使用基于规则的分析,userId:{}", userId); + return generateRuleBasedDynamics(userId); + } + } catch (TimeoutException e) { + log.error("AI分析超时,userId:{}", userId, e); + return generateRuleBasedDynamics(userId); } catch (Exception e) { - log.error("AI分析超时或失败", e); - // 如果AI分析失败,返回基于规则的分析结果 + log.error("AI分析失败,userId:{}", userId, e); return generateRuleBasedDynamics(userId); } // 4. 限制返回最多3条动态 @@ -163,17 +181,26 @@ private String buildAnalysisPrompt(String learningDataSummary) { 学习数据: %s - 请按照以下JSON格式返回分析结果,每条动态都要有明确的类型、标题、描述和建议: + **重要要求:请严格按照以下JSON格式返回分析结果,不要添加任何额外的文字说明,只返回纯JSON数组:** [ { - "type": "progress|weakness|achievement", + "type": "progress", "title": "动态标题", "description": "详细描述", "subject": "相关科目", - "priority": 1-5, + "priority": 3, "suggestion": "建议行动", - "relatedQuestionCount": 数量 + "relatedQuestionCount": 5 + }, + { + "type": "weakness", + "title": "另一个动态标题", + "description": "另一个详细描述", + "subject": "另一个科目", + "priority": 4, + "suggestion": "另一个建议行动", + "relatedQuestionCount": 2 } ] @@ -182,6 +209,13 @@ private String buildAnalysisPrompt(String learningDataSummary) { - weakness: 发现薄弱点,如某科目错误较多、学习频率低等 - achievement: 学习成就,如连续学习天数、完成学习目标等 + **注意:** + 1. 必须返回有效的JSON数组格式 + 2. priority必须是1-5之间的整数 + 3. relatedQuestionCount必须是非负整数 + 4. 所有字符串字段不能为null + 5. 不要在JSON前后添加任何解释文字 + 请确保返回的是有效的JSON格式,优先级高的动态排在前面。 """, learningDataSummary); } @@ -191,8 +225,16 @@ private String buildAnalysisPrompt(String learningDataSummary) { */ private List parseAIResponse(String aiResponse) { try { + log.debug("开始解析AI响应,响应长度: {}", aiResponse != null ? aiResponse.length() : 0); + + if (aiResponse == null || aiResponse.trim().isEmpty()) { + log.warn("AI响应为空,返回空列表"); + return Collections.emptyList(); + } + // 提取JSON部分(AI可能返回额外的文本) String jsonPart = extractJsonFromResponse(aiResponse); + log.debug("提取的JSON部分: {}", jsonPart.length() > 500 ? jsonPart.substring(0, 500) + "..." : jsonPart); // 解析JSON List> dynamicMaps = objectMapper.readValue( @@ -201,38 +243,125 @@ private List parseAIResponse(String aiResponse) { List dynamics = new ArrayList<>(); for (Map map : dynamicMaps) { - LearningDynamicVO dynamic = LearningDynamicVO.builder() - .type(map.get("type").toString()) - .title(map.get("title").toString()) - .description(map.get("description").toString()) - .subject(map.get("subject") != null ? map.get("subject").toString() : null) - .priority(Integer.parseInt(map.get("priority").toString())) - .suggestion(map.get("suggestion") != null ? map.get("suggestion").toString() : null) - .relatedQuestionCount(map.get("relatedQuestionCount") != null ? - Integer.parseInt(map.get("relatedQuestionCount").toString()) : 0) - .build(); - dynamics.add(dynamic); + try { + LearningDynamicVO dynamic = LearningDynamicVO.builder() + .type(getStringValue(map, "type")) + .title(getStringValue(map, "title")) + .description(getStringValue(map, "description")) + .subject(getStringValue(map, "subject")) + .priority(getIntValue(map, "priority", 3)) + .suggestion(getStringValue(map, "suggestion")) + .relatedQuestionCount(getIntValue(map, "relatedQuestionCount", 0)) + .build(); + dynamics.add(dynamic); + log.debug("成功解析学习动态: type={}, title={}", dynamic.getType(), dynamic.getTitle()); + } catch (Exception e) { + log.warn("解析单个学习动态失败,跳过该项: {}", map, e); + } } + log.info("成功解析AI响应,获得{}条学习动态", dynamics.size()); return dynamics; } catch (Exception e) { - log.error("解析AI分析结果失败:{}", aiResponse, e); + log.error("解析AI分析结果失败,响应内容: {}", + aiResponse != null && aiResponse.length() > 200 ? + aiResponse.substring(0, 200) + "..." : aiResponse, e); return Collections.emptyList(); } } + + /** + * 安全获取字符串值 + */ + private String getStringValue(Map map, String key) { + Object value = map.get(key); + return value != null ? value.toString() : null; + } + + /** + * 安全获取整数值 + */ + private int getIntValue(Map map, String key, int defaultValue) { + Object value = map.get(key); + if (value == null) { + return defaultValue; + } + try { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + return Integer.parseInt(value.toString()); + } catch (NumberFormatException e) { + log.warn("无法解析整数值: key={}, value={}, 使用默认值: {}", key, value, defaultValue); + return defaultValue; + } + } + + /** + * 检查响应是否包含完整的JSON + */ + private boolean isCompleteJsonResponse(String response) { + if (response == null || response.trim().isEmpty()) { + return false; + } + + String trimmed = response.trim(); + + // 检查是否包含JSON数组的开始和结束标记 + int startIndex = trimmed.indexOf('['); + int endIndex = trimmed.lastIndexOf(']'); + + if (startIndex == -1 || endIndex == -1 || endIndex <= startIndex) { + return false; + } + + // 简单的括号匹配检查 + int openBrackets = 0; + int closeBrackets = 0; + + for (char c : trimmed.toCharArray()) { + if (c == '[') openBrackets++; + if (c == ']') closeBrackets++; + } + + return openBrackets == closeBrackets && openBrackets > 0; + } /** * 从AI响应中提取JSON部分 */ private String extractJsonFromResponse(String response) { + if (response == null || response.trim().isEmpty()) { + throw new IllegalArgumentException("AI响应为空"); + } + + String trimmed = response.trim(); + log.debug("尝试从响应中提取JSON,响应长度: {}", trimmed.length()); + // 查找JSON数组的开始和结束 - int start = response.indexOf('['); - int end = response.lastIndexOf(']'); + int start = trimmed.indexOf('['); + int end = trimmed.lastIndexOf(']'); if (start != -1 && end != -1 && end > start) { - return response.substring(start, end + 1); + String jsonPart = trimmed.substring(start, end + 1); + log.debug("成功提取JSON部分,长度: {}", jsonPart.length()); + return jsonPart; + } + + // 如果没有找到完整的JSON数组,尝试查找JSON对象 + start = trimmed.indexOf('{'); + end = trimmed.lastIndexOf('}'); + + if (start != -1 && end != -1 && end > start) { + // 包装单个对象为数组 + String jsonPart = "[" + trimmed.substring(start, end + 1) + "]"; + log.debug("找到JSON对象,包装为数组,长度: {}", jsonPart.length()); + return jsonPart; } + + log.error("无法从AI响应中提取有效的JSON,响应内容: {}", + trimmed.length() > 200 ? trimmed.substring(0, 200) + "..." : trimmed); throw new IllegalArgumentException("无法从AI响应中提取有效的JSON"); } diff --git a/refine-domain/src/main/java/com/achobeta/domain/user/event/UserBehaviorEvent.java b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserBehaviorEvent.java new file mode 100644 index 0000000..cf39c3e --- /dev/null +++ b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserBehaviorEvent.java @@ -0,0 +1,32 @@ +package com.achobeta.domain.user.event; + +import lombok.Getter; +import org.springframework.context.ApplicationEvent; + +/** + * @Auth : Malog + * @Desc : 用户行为事件基类 + * @Time : 2025/11/28 + */ +@Getter +public abstract class UserBehaviorEvent extends ApplicationEvent { + + private final String userId; + private final String actionType; + private final String questionId; + private final String questionContent; + private final String subject; + private final Integer knowledgePointId; + + public UserBehaviorEvent(Object source, String userId, String actionType, + String questionId, String questionContent, + String subject, Integer knowledgePointId) { + super(source); + this.userId = userId; + this.actionType = actionType; + this.questionId = questionId; + this.questionContent = questionContent; + this.subject = subject; + this.knowledgePointId = knowledgePointId; + } +} \ No newline at end of file diff --git a/refine-domain/src/main/java/com/achobeta/domain/user/event/UserMistakeEvent.java b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserMistakeEvent.java new file mode 100644 index 0000000..c59e118 --- /dev/null +++ b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserMistakeEvent.java @@ -0,0 +1,14 @@ +package com.achobeta.domain.user.event; + +/** + * @Auth : Malog + * @Desc : 用户错题事件 + * @Time : 2025/11/28 + */ +public class UserMistakeEvent extends UserBehaviorEvent { + + public UserMistakeEvent(Object source, String userId, String questionId, + String questionContent, String subject, Integer knowledgePointId) { + super(source, userId, "mistake", questionId, questionContent, subject, knowledgePointId); + } +} \ No newline at end of file diff --git a/refine-domain/src/main/java/com/achobeta/domain/user/event/UserReviewEvent.java b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserReviewEvent.java new file mode 100644 index 0000000..37bd77f --- /dev/null +++ b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserReviewEvent.java @@ -0,0 +1,14 @@ +package com.achobeta.domain.user.event; + +/** + * @Auth : Malog + * @Desc : 用户复习错题事件 + * @Time : 2025/11/28 + */ +public class UserReviewEvent extends UserBehaviorEvent { + + public UserReviewEvent(Object source, String userId, String questionId, + String questionContent, String subject, Integer knowledgePointId) { + super(source, userId, "review", questionId, questionContent, subject, knowledgePointId); + } +} \ No newline at end of file diff --git a/refine-domain/src/main/java/com/achobeta/domain/user/event/UserUploadEvent.java b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserUploadEvent.java new file mode 100644 index 0000000..553b13e --- /dev/null +++ b/refine-domain/src/main/java/com/achobeta/domain/user/event/UserUploadEvent.java @@ -0,0 +1,14 @@ +package com.achobeta.domain.user.event; + +/** + * @Auth : Malog + * @Desc : 用户上传题目事件 + * @Time : 2025/11/28 + */ +public class UserUploadEvent extends UserBehaviorEvent { + + public UserUploadEvent(Object source, String userId, String questionId, + String questionContent, String subject, Integer knowledgePointId) { + super(source, userId, "upload", questionId, questionContent, subject, knowledgePointId); + } +} \ No newline at end of file diff --git a/refine-infrastructure/pom.xml b/refine-infrastructure/pom.xml index 904eecc..60f4b2a 100644 --- a/refine-infrastructure/pom.xml +++ b/refine-infrastructure/pom.xml @@ -11,6 +11,10 @@ refine-infrastructure + + io.weaviate + client + org.apache.pdfbox diff --git a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/LearningDataRepository.java b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/LearningDataRepository.java index 8107137..bbbfb0f 100644 --- a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/LearningDataRepository.java +++ b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/LearningDataRepository.java @@ -3,14 +3,17 @@ import com.achobeta.domain.rag.adapter.port.ILearningDataRepository; import com.achobeta.domain.rag.model.entity.LearningVectorEntity; import com.achobeta.domain.rag.model.valobj.LearningStatisticsVO; -import com.achobeta.infrastructure.dao.vector.IVectorDao; -import com.achobeta.infrastructure.dao.po.LearningVector; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.graphql.model.GraphQLResponse; +import io.weaviate.client.v1.graphql.query.fields.Field; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import org.springframework.util.CollectionUtils; import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.*; import java.util.stream.Collectors; @@ -24,7 +27,12 @@ public class LearningDataRepository implements ILearningDataRepository { @Autowired - private IVectorDao vectorDao; + private WeaviateClient weaviateClient; + + @Autowired + private String weaviateClassName; + + private final ObjectMapper objectMapper = new ObjectMapper(); @Override public List getUserRecentLearningData(String userId, int days) { @@ -41,22 +49,16 @@ public List getUserRecentLearningData(String userId, int d return Collections.emptyList(); } - // 从数据库查询用户最近N天的学习数据 - List vectors = vectorDao.getUserRecentLearningData(userId, days); - - if (CollectionUtils.isEmpty(vectors)) { - log.info("用户最近{}天无学习数据,userId:{}", days, userId); - return Collections.emptyList(); - } + // 使用Weaviate查询用户最近N天的学习数据 + List> rawData = queryUserDataWithTimeFilter(userId, days); - // 转换为领域实体 - List result = vectors.stream() - .map(this::convertToEntity) + // 转换为LearningVectorEntity + return rawData.stream() + .map(this::convertToLearningVectorEntity) + .filter(Objects::nonNull) + .sorted((a, b) -> b.getCreatedAt().compareTo(a.getCreatedAt())) // 按时间倒序 .collect(Collectors.toList()); - log.info("成功获取用户学习向量数据,userId:{}, 数据量:{}", userId, result.size()); - return result; - } catch (Exception e) { log.error("获取用户学习向量数据失败,userId:{}, days:{}", userId, days, e); return Collections.emptyList(); @@ -78,11 +80,10 @@ public LearningStatisticsVO getUserLearningStatistics(String userId, int days) { return null; } - // 从数据库查询统计数据 - Map statisticsMap = vectorDao.getUserLearningStatistics(userId, days); + // 使用Weaviate查询用户学习统计数据 + List> rawData = queryUserDataWithTimeFilter(userId, days); - if (statisticsMap == null || statisticsMap.isEmpty()) { - log.info("用户最近{}天无学习统计数据,userId:{}", days, userId); + if (rawData.isEmpty()) { return LearningStatisticsVO.builder() .totalActivities(0) .subjectsCount(0) @@ -91,11 +92,70 @@ public LearningStatisticsVO getUserLearningStatistics(String userId, int days) { .build(); } - // 转换为统计VO - LearningStatisticsVO result = convertToStatisticsVO(statisticsMap); + // 计算统计数据 + int totalActivities = rawData.size(); - log.info("成功获取用户学习统计数据,userId:{}, 总活动次数:{}", userId, result.getTotalActivities()); - return result; + Set subjects = rawData.stream() + .map(data -> (String) data.get("subject")) + .filter(Objects::nonNull) + .filter(s -> !s.trim().isEmpty()) + .collect(Collectors.toSet()); + + Set actionTypes = rawData.stream() + .map(data -> (String) data.get("actionType")) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + + Set activeDates = rawData.stream() + .map(data -> (String) data.get("createdAt")) + .filter(Objects::nonNull) + .map(dateStr -> { + try { + LocalDateTime dateTime = LocalDateTime.parse(dateStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + return dateTime.toLocalDate().toString(); + } catch (Exception e) { + return null; + } + }) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + + LocalDateTime firstActivity = rawData.stream() + .map(data -> (String) data.get("createdAt")) + .filter(Objects::nonNull) + .map(dateStr -> { + try { + return LocalDateTime.parse(dateStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } catch (Exception e) { + return null; + } + }) + .filter(Objects::nonNull) + .min(LocalDateTime::compareTo) + .orElse(null); + + LocalDateTime lastActivity = rawData.stream() + .map(data -> (String) data.get("createdAt")) + .filter(Objects::nonNull) + .map(dateStr -> { + try { + return LocalDateTime.parse(dateStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } catch (Exception e) { + return null; + } + }) + .filter(Objects::nonNull) + .max(LocalDateTime::compareTo) + .orElse(null); + + return LearningStatisticsVO.builder() + .totalActivities(totalActivities) + .subjectsCount(subjects.size()) + .activeDays(activeDates.size()) + .actionTypesCount(actionTypes.size()) + .firstActivity(firstActivity) + .lastActivity(lastActivity) + .build(); } catch (Exception e) { log.error("获取用户学习统计数据失败,userId:{}, days:{}", userId, days, e); @@ -118,15 +178,43 @@ public List> getUserLearningDataBySubject(String userId, int return Collections.emptyList(); } - // 从数据库查询按科目分组的学习数据 - List> result = vectorDao.getUserLearningDataBySubject(userId, days); + // 使用Weaviate查询用户学习数据 + List> rawData = queryUserDataWithTimeFilter(userId, days); - if (CollectionUtils.isEmpty(result)) { - log.info("用户最近{}天无按科目分组的学习数据,userId:{}", days, userId); + if (rawData.isEmpty()) { return Collections.emptyList(); } - log.info("成功获取用户按科目分组学习数据,userId:{}, 科目数量:{}", userId, result.size()); + // 按科目分组统计 + Map>> subjectGroups = rawData.stream() + .filter(data -> data.get("subject") != null && !data.get("subject").toString().trim().isEmpty()) + .collect(Collectors.groupingBy(data -> data.get("subject").toString())); + + // 转换为结果格式 + List> result = new ArrayList<>(); + for (Map.Entry>> entry : subjectGroups.entrySet()) { + String subject = entry.getKey(); + List> subjectData = entry.getValue(); + + Set actionTypes = subjectData.stream() + .map(data -> (String) data.get("actionType")) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + + Map subjectStat = new HashMap<>(); + subjectStat.put("subject", subject); + subjectStat.put("activity_count", subjectData.size()); + subjectStat.put("action_list", String.join(",", actionTypes)); + result.add(subjectStat); + } + + // 按活动数量降序排序 + result.sort((a, b) -> { + Integer countA = (Integer) a.get("activity_count"); + Integer countB = (Integer) b.get("activity_count"); + return countB.compareTo(countA); + }); + return result; } catch (Exception e) { @@ -150,15 +238,44 @@ public List> getUserLearningDataByActionType(String userId, return Collections.emptyList(); } - // 从数据库查询按行为类型分组的学习数据 - List> result = vectorDao.getUserLearningDataByActionType(userId, days); + // 使用Weaviate查询用户学习数据 + List> rawData = queryUserDataWithTimeFilter(userId, days); - if (CollectionUtils.isEmpty(result)) { - log.info("用户最近{}天无按行为类型分组的学习数据,userId:{}", days, userId); + if (rawData.isEmpty()) { return Collections.emptyList(); } - log.info("成功获取用户按行为类型分组学习数据,userId:{}, 行为类型数量:{}", userId, result.size()); + // 按行为类型分组统计 + Map>> actionGroups = rawData.stream() + .filter(data -> data.get("actionType") != null) + .collect(Collectors.groupingBy(data -> data.get("actionType").toString())); + + // 转换为结果格式 + List> result = new ArrayList<>(); + for (Map.Entry>> entry : actionGroups.entrySet()) { + String actionType = entry.getKey(); + List> actionData = entry.getValue(); + + Set subjects = actionData.stream() + .map(data -> (String) data.get("subject")) + .filter(Objects::nonNull) + .filter(s -> !s.trim().isEmpty()) + .collect(Collectors.toSet()); + + Map actionStat = new HashMap<>(); + actionStat.put("action_type", actionType); + actionStat.put("activity_count", actionData.size()); + actionStat.put("subjects_count", subjects.size()); + result.add(actionStat); + } + + // 按活动数量降序排序 + result.sort((a, b) -> { + Integer countA = (Integer) a.get("activity_count"); + Integer countB = (Integer) b.get("activity_count"); + return countB.compareTo(countA); + }); + return result; } catch (Exception e) { @@ -168,26 +285,143 @@ public List> getUserLearningDataByActionType(String userId, } /** - * 将数据库PO转换为领域实体 + * 查询用户指定时间范围内的学习数据 */ - private LearningVectorEntity convertToEntity(LearningVector vector) { - if (vector == null) { + private List> queryUserDataWithTimeFilter(String userId, int days) { + try { + // 计算时间范围 + LocalDateTime endTime = LocalDateTime.now(); + LocalDateTime startTime = endTime.minusDays(days); + + Field[] fields = { + Field.builder().name("userId").build(), + Field.builder().name("questionId").build(), + Field.builder().name("questionContent").build(), + Field.builder().name("actionType").build(), + Field.builder().name("subject").build(), + Field.builder().name("knowledgePointId").build(), + Field.builder().name("createdAt").build() + }; + + io.weaviate.client.v1.filters.WhereFilter whereFilter = io.weaviate.client.v1.filters.WhereFilter.builder() + .operator(io.weaviate.client.v1.filters.Operator.And) + .operands(new io.weaviate.client.v1.filters.WhereFilter[]{ + io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"userId"}) + .operator(io.weaviate.client.v1.filters.Operator.Equal) + .valueText(userId) + .build(), + io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"createdAt"}) + .operator(io.weaviate.client.v1.filters.Operator.GreaterThanEqual) + .valueText(startTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)) + .build() + }) + .build(); + + Result result = weaviateClient.graphQL().get() + .withClassName(weaviateClassName) + .withFields(fields) + .withWhere(whereFilter) + .withLimit(1000) + .run(); + + if (result.hasErrors()) { + log.error("查询用户学习数据失败: {}", result.getError().getMessages()); + return Collections.emptyList(); + } + + return parseQueryResults(result.getResult()); + + } catch (Exception e) { + log.error("查询用户学习数据失败,userId:{} days:{}", userId, days, e); + return Collections.emptyList(); + } + } + + /** + * 解析Weaviate查询结果 + */ + @SuppressWarnings("unchecked") + private List> parseQueryResults(GraphQLResponse response) { + try { + if (response == null || response.getData() == null) { + return Collections.emptyList(); + } + + // 安全地转换getData()的结果 + Object dataObj = response.getData(); + if (!(dataObj instanceof Map)) { + log.warn("Weaviate查询结果数据格式不正确,期望Map类型,实际类型: {}", dataObj.getClass().getSimpleName()); + return Collections.emptyList(); + } + + Map data = (Map) dataObj; + Map get = (Map) data.get("Get"); + if (get == null) { + return Collections.emptyList(); + } + + List> objects = (List>) get.get(weaviateClassName); + if (objects == null) { + return Collections.emptyList(); + } + + return objects; + + } catch (Exception e) { + log.error("解析Weaviate查询结果失败", e); + return Collections.emptyList(); + } + } + + /** + * 将Weaviate查询结果转换为LearningVectorEntity + */ + private LearningVectorEntity convertToLearningVectorEntity(Map data) { + try { + if (data == null) { + return null; + } + + LocalDateTime createdAt = null; + String createdAtStr = (String) data.get("createdAt"); + if (createdAtStr != null) { + try { + createdAt = LocalDateTime.parse(createdAtStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } catch (Exception e) { + log.warn("解析创建时间失败: {}", createdAtStr); + } + } + + Integer knowledgePointId = null; + Object kpId = data.get("knowledgePointId"); + if (kpId != null) { + if (kpId instanceof Number) { + knowledgePointId = ((Number) kpId).intValue(); + } else { + try { + knowledgePointId = Integer.parseInt(kpId.toString()); + } catch (NumberFormatException e) { + log.warn("解析知识点ID失败: {}", kpId); + } + } + } + + return LearningVectorEntity.builder() + .userId((String) data.get("userId")) + .questionId((String) data.get("questionId")) + .questionContent((String) data.get("questionContent")) + .actionType((String) data.get("actionType")) + .subject((String) data.get("subject")) + .knowledgePointId(knowledgePointId) + .createdAt(createdAt) + .build(); + + } catch (Exception e) { + log.error("转换LearningVectorEntity失败", e); return null; } - - return LearningVectorEntity.builder() - .id(vector.getId()) - .userId(vector.getUserId()) - .questionId(vector.getQuestionId()) - .actionType(vector.getActionType()) - .questionContent(vector.getQuestionContent()) - .subject(vector.getSubject()) - .knowledgePointId(vector.getKnowledgePointId()) - .embedding(vector.getEmbedding()) - .metadata(vector.getMetadata()) - .createdAt(vector.getCreatedAt()) - .updatedAt(vector.getUpdatedAt()) - .build(); } /** diff --git a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/VectorRepository.java b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/VectorRepository.java deleted file mode 100644 index 4ea2adc..0000000 --- a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/VectorRepository.java +++ /dev/null @@ -1,421 +0,0 @@ -package com.achobeta.infrastructure.adapter.repository; - -import com.achobeta.domain.rag.model.valobj.LearningInsightVO; -import com.achobeta.domain.rag.model.valobj.SimilarQuestionVO; -import com.achobeta.domain.rag.service.IVectorService; -import com.achobeta.infrastructure.dao.vector.IVectorDao; -import com.achobeta.infrastructure.dao.po.LearningVector; -import com.achobeta.infrastructure.gateway.DashScopeEmbeddingService; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Repository; - -import java.util.*; -import java.util.stream.Collectors; - -import static com.achobeta.types.enums.ActionType.MISTAKE; - -/** - * 向量数据库适配器实现 - */ -@Slf4j -@Repository -public class VectorRepository implements IVectorService { - - @Autowired - private IVectorDao vectorDao; - - @Autowired - private DashScopeEmbeddingService embeddingService; // 使用DashScope嵌入服务 - - public VectorRepository(DashScopeEmbeddingService embeddingService) { - this.embeddingService = embeddingService; - } - - /** - * 存储学习向量信息 - * - * @param userId 用户ID - * @param questionId 题目ID - * @param questionContent 题目内容 - * @param actionType 操作类型 - * @param subject 科目 - * @param knowledgePointId 知识点ID - * @return 存储成功返回true,失败返回false - */ - @Override - public boolean storeLearningVector(String userId, String questionId, String questionContent, - String actionType, String subject, Integer knowledgePointId) { - try { - // 构建向量化文本 - String vectorText = buildVectorText(actionType, questionContent, subject); - - // 生成向量 - float[] embedding = embeddingService.generateEmbedding(vectorText); - - // 构建数据库实体 - LearningVector vector = LearningVector.builder() - .userId(userId) - .questionId(questionId) - .actionType(actionType) - .questionContent(questionContent) - .subject(subject) - .knowledgePointId(knowledgePointId) - .embedding(embedding) - .metadata(buildMetadata(actionType, subject, knowledgePointId)) - .build(); - - int result = vectorDao.insertLearningVector(vector); - - log.info("成功存储学习向量,userId:{} questionId:{} actionType:{}", - userId, questionId, actionType); - return result > 0; - - } catch (Exception e) { - log.error("存储学习向量失败,userId:{} questionId:{}", userId, questionId, e); - return false; - } - } - - - /** - * 搜索与给定文本相似的题目。 - * - * @param userId 用户ID,用于限定搜索范围 - * @param queryText 查询文本,将被转换为向量进行相似度匹配 - * @param limit 返回结果的最大数量限制 - * @return 相似的题目列表,封装为 SimilarQuestionVO 对象;若发生异常则返回空列表 - */ - @Override - public List searchSimilarQuestions(String userId, String queryText, int limit) { - try { - // 生成查询向量 - float[] queryEmbedding = embeddingService.generateEmbedding(queryText); - - // 执行向量相似度搜索 - List similarVectors = vectorDao.searchSimilarVectors( - userId, queryEmbedding, limit); - - // 转换为值对象 - return similarVectors.stream() - .map(this::convertToSimilarQuestionVO) - .collect(Collectors.toList()); - - } catch (Exception e) { - log.error("搜索相似题目失败,userId:{} queryText:{}", userId, queryText, e); - return Collections.emptyList(); - } - } - - /** - * 获取用户的薄弱知识点分析结果。 - * - * @param userId 用户ID - * @return 用户的薄弱点信息列表,封装为 LearningInsightVO 对象;若发生异常则返回空列表 - */ - @Override - public List getUserWeaknesses(String userId) { - try { - // 分析用户错题模式 - List userVectors = vectorDao.getUserVectorsByActionType(userId, MISTAKE.getActionType()); - - // 使用聚类分析找出薄弱点 - return analyzeWeaknesses(userVectors); - - } catch (Exception e) { - log.error("获取用户薄弱点失败,userId:{}", userId, e); - return Collections.emptyList(); - } - } - - /** - * 根据用户的学习行为数据生成个性化推荐内容。 - * - * @param userId 用户ID - * @return 推荐内容列表,封装为 LearningInsightVO 对象;若发生异常则返回空列表 - */ - @Override - public List getUserRecommendations(String userId) { - try { - // 获取用户所有学习向量 - List allVectors = vectorDao.getUserAllVectors(userId); - - // 生成学习推荐 - return generateRecommendations(allVectors); - - } catch (Exception e) { - log.error("获取用户学习推荐失败,userId:{}", userId, e); - return Collections.emptyList(); - } - } - - /** - * 分析指定用户的学习行为模式,并将分析结果保存到数据库中。 - * - * @param userId 用户ID - */ - @Override - public void analyzeUserLearningPatterns(String userId) { - try { - log.info("开始分析用户学习模式,userId:{}", userId); - - // 获取用户所有学习向量 - List allVectors = vectorDao.getUserAllVectors(userId); - - if (allVectors.isEmpty()) { - log.info("用户暂无学习数据,userId:{}", userId); - return; - } - - // 分析学习模式并生成洞察 - List insights = generateLearningInsights(allVectors); - - // 保存分析结果 - for (LearningInsightVO insight : insights) { - vectorDao.saveLearningInsight(userId, insight); - } - log.info("用户学习模式分析完成,userId:{} 生成洞察数量:{}", userId, insights.size()); - - } catch (Exception e) { - log.error("分析用户学习模式失败,userId:{}", userId, e); - } - } - - - /** - * 构建向量化文本 - */ - private String buildVectorText(String actionType, String questionContent, String subject) { - StringBuilder textBuilder = new StringBuilder(); - - // 添加行为描述 - textBuilder.append("用户").append(getActionDescription(actionType)); - - // 添加科目信息 - if (subject != null && !subject.trim().isEmpty()) { - textBuilder.append("了一道").append(subject).append("题目:"); - } else { - textBuilder.append("了一道题目:"); - } - - // 添加题目内容 - if (questionContent != null && !questionContent.trim().isEmpty()) { - // 限制题目内容长度,避免向量化文本过长 - String content = questionContent.trim(); - if (content.length() > 500) { - content = content.substring(0, 500) + "..."; - } - textBuilder.append(content); - } - - return textBuilder.toString(); - } - - /** - * 获取行为类型描述 - */ - private String getActionDescription(String actionType) { - switch (actionType) { - case "upload": - return "上传"; - case "review": - return "复习"; - case "qa": - return "问答"; - case "mistake": - return "做错"; - default: - return "学习"; - } - } - - /** - * 构建元数据 - */ - private Map buildMetadata(String actionType, String subject, Integer knowledgePointId) { - Map metadata = new HashMap<>(); - metadata.put("actionType", actionType); - if (subject != null) metadata.put("subject", subject); - if (knowledgePointId != null) metadata.put("knowledgePointId", knowledgePointId); - metadata.put("timestamp", System.currentTimeMillis()); - metadata.put("embeddingModel", "dashscope-text-embedding-v1"); - return metadata; - } - - /** - * 转换为相似题目值对象 - */ - private SimilarQuestionVO convertToSimilarQuestionVO(LearningVector vector) { - return SimilarQuestionVO.builder() - .questionId(vector.getQuestionId()) - .questionContent(vector.getQuestionContent()) - .actionType(vector.getActionType()) - .subject(vector.getSubject()) - .similarity(vector.getSimilarity()) - .createdAt(vector.getCreatedAt() != null ? vector.getCreatedAt().toString() : null) - .build(); - } - - /** - * 分析薄弱点 - */ - private List analyzeWeaknesses(List vectors) { - List insights = new ArrayList<>(); - - if (vectors.isEmpty()) { - return insights; - } - - // 按科目分组分析 - Map> subjectGroups = vectors.stream() - .filter(v -> v.getSubject() != null && !v.getSubject().trim().isEmpty()) - .collect(Collectors.groupingBy(LearningVector::getSubject)); - - for (Map.Entry> entry : subjectGroups.entrySet()) { - String subject = entry.getKey(); - List subjectVectors = entry.getValue(); - - if (subjectVectors.size() >= 3) { // 至少3道错题才认为是薄弱点 - insights.add(LearningInsightVO.builder() - .type("weakness") - .title(subject + "学科薄弱") - .description(String.format("在%s学科中错误%d道题目,建议加强练习", - subject, subjectVectors.size())) - .confidenceScore(Math.min(0.9, 0.5 + subjectVectors.size() * 0.1)) - .relatedQuestions(subjectVectors.stream() - .map(LearningVector::getQuestionId) - .collect(Collectors.toList())) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - } - - // 分析总体错题频率 - if (vectors.size() > 10) { - insights.add(LearningInsightVO.builder() - .type("weakness") - .title("错题频率较高") - .description(String.format("总共有%d道错题,建议系统性复习基础知识", vectors.size())) - .confidenceScore(0.8) - .relatedQuestions(vectors.stream() - .limit(10) - .map(LearningVector::getQuestionId) - .collect(Collectors.toList())) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - - return insights; - } - - /** - * 生成学习推荐 - */ - private List generateRecommendations(List vectors) { - List recommendations = new ArrayList<>(); - - if (vectors.isEmpty()) { - recommendations.add(LearningInsightVO.builder() - .type("recommendation") - .title("开始学习之旅") - .description("还没有学习记录,建议开始上传题目进行学习") - .confidenceScore(1.0) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - return recommendations; - } - - // 分析学习频率 - long recentWeekCount = vectors.stream() - .filter(v -> v.getCreatedAt() != null && - v.getCreatedAt().isAfter(java.time.LocalDateTime.now().minusWeeks(1))) - .count(); - - if (recentWeekCount > 10) { - recommendations.add(LearningInsightVO.builder() - .type("recommendation") - .title("保持学习节奏") - .description("近一周学习活跃,建议继续保持良好的学习习惯") - .confidenceScore(0.9) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } else if (recentWeekCount < 3) { - recommendations.add(LearningInsightVO.builder() - .type("recommendation") - .title("增加学习频率") - .description("建议增加学习时间,每天至少学习30分钟") - .confidenceScore(0.8) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - - // 分析科目分布 - Map subjectCounts = vectors.stream() - .filter(v -> v.getSubject() != null && !v.getSubject().trim().isEmpty()) - .collect(Collectors.groupingBy(LearningVector::getSubject, Collectors.counting())); - - if (subjectCounts.size() == 1) { - String subject = subjectCounts.keySet().iterator().next(); - recommendations.add(LearningInsightVO.builder() - .type("recommendation") - .title("扩展学习科目") - .description(String.format("目前主要学习%s,建议尝试其他科目的学习", subject)) - .confidenceScore(0.7) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - - return recommendations; - } - - /** - * 生成学习洞察 - */ - private List generateLearningInsights(List vectors) { - List insights = new ArrayList<>(); - - // 分析学习频率 - long recentWeekCount = vectors.stream() - .filter(v -> v.getCreatedAt() != null && - v.getCreatedAt().isAfter(java.time.LocalDateTime.now().minusWeeks(1))) - .count(); - - if (recentWeekCount > 15) { - insights.add(LearningInsightVO.builder() - .type("strength") - .title("学习积极性高") - .description("近一周学习非常活跃,保持良好的学习习惯") - .confidenceScore(0.9) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - - // 分析学习连续性 - long activeDays = vectors.stream() - .filter(v -> v.getCreatedAt() != null && - v.getCreatedAt().isAfter(java.time.LocalDateTime.now().minusWeeks(1))) - .map(v -> v.getCreatedAt().toLocalDate()) - .distinct() - .count(); - - if (activeDays >= 5) { - insights.add(LearningInsightVO.builder() - .type("achievement") - .title("学习坚持性优秀") - .description(String.format("近一周有%d天进行了学习,坚持性很好", activeDays)) - .confidenceScore(0.8) - .createdAt(java.time.LocalDateTime.now()) - .isActive(true) - .build()); - } - - return insights; - } -} \ No newline at end of file diff --git a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateSchemaInitializer.java b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateSchemaInitializer.java new file mode 100644 index 0000000..18c2050 --- /dev/null +++ b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateSchemaInitializer.java @@ -0,0 +1,156 @@ +package com.achobeta.infrastructure.adapter.repository; + +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.schema.model.DataType; +import io.weaviate.client.v1.schema.model.Property; +import io.weaviate.client.v1.schema.model.WeaviateClass; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.CommandLineRunner; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; + +/** + * @Auth : Malog + * @Desc : Weaviate Schema初始化器 + * @Time : 2025/12/01 + */ +@Slf4j +@Component +public class WeaviateSchemaInitializer implements CommandLineRunner { + + @Autowired + private WeaviateClient weaviateClient; + + @Value("${weaviate.class-name:LearningVector}") + private String className; + + @Override + public void run(String... args) throws Exception { + try { + log.info("开始初始化Weaviate Schema,className: {}", className); + + // 检查Schema是否已存在 + if (isSchemaExists()) { + log.info("Weaviate Schema已存在,跳过初始化,className: {}", className); + return; + } + + // 创建Schema + createSchema(); + log.info("Weaviate Schema初始化完成,className: {}", className); + + } catch (Exception e) { + log.error("Weaviate Schema初始化失败", e); + // 不抛出异常,避免影响应用启动 + } + } + + /** + * 检查Schema是否存在 + */ + private boolean isSchemaExists() { + try { + Result result = weaviateClient.schema().classGetter() + .withClassName(className) + .run(); + + return !result.hasErrors() && result.getResult() != null; + } catch (Exception e) { + log.warn("检查Schema是否存在时发生异常: {}", e.getMessage()); + return false; + } + } + + /** + * 创建Schema + */ + private void createSchema() { + try { + // 定义属性 + List properties = new ArrayList<>(); + + // 用户ID + properties.add(Property.builder() + .name("userId") + .dataType(List.of(DataType.TEXT)) + .description("用户ID") + .build()); + + // 题目ID + properties.add(Property.builder() + .name("questionId") + .dataType(List.of(DataType.TEXT)) + .description("题目ID") + .build()); + + // 题目内容 + properties.add(Property.builder() + .name("questionContent") + .dataType(List.of(DataType.TEXT)) + .description("题目内容") + .build()); + + // 行为类型 + properties.add(Property.builder() + .name("actionType") + .dataType(List.of(DataType.TEXT)) + .description("行为类型") + .build()); + + // 科目 + properties.add(Property.builder() + .name("subject") + .dataType(List.of(DataType.TEXT)) + .description("科目") + .build()); + + // 知识点ID + properties.add(Property.builder() + .name("knowledgePointId") + .dataType(List.of(DataType.INT)) + .description("知识点ID") + .build()); + + // 创建时间 + properties.add(Property.builder() + .name("createdAt") + .dataType(List.of(DataType.TEXT)) + .description("创建时间") + .build()); + + // 更新时间 + properties.add(Property.builder() + .name("updatedAt") + .dataType(List.of(DataType.TEXT)) + .description("更新时间") + .build()); + + // 创建类定义 + WeaviateClass weaviateClass = WeaviateClass.builder() + .className(className) + .description("学习行为向量数据") + .properties(properties) + .vectorizer("text2vec-cohere") // 使用Cohere向量化器,适合云实例 + .build(); + + // 创建Schema + Result result = weaviateClient.schema().classCreator() + .withClass(weaviateClass) + .run(); + + if (result.hasErrors()) { + log.error("创建Weaviate Schema失败: {}", result.getError().getMessages()); + } else { + log.info("成功创建Weaviate Schema,className: {}", className); + } + + } catch (Exception e) { + log.error("创建Weaviate Schema时发生异常", e); + } + } +} \ No newline at end of file diff --git a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateVectorRepository.java b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateVectorRepository.java new file mode 100644 index 0000000..95c9a67 --- /dev/null +++ b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/adapter/repository/WeaviateVectorRepository.java @@ -0,0 +1,573 @@ +package com.achobeta.infrastructure.adapter.repository; + +import com.achobeta.domain.rag.model.valobj.LearningInsightVO; +import com.achobeta.domain.rag.model.valobj.SimilarQuestionVO; +import com.achobeta.domain.rag.service.IVectorService; +import com.achobeta.infrastructure.gateway.DashScopeEmbeddingService; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.data.model.WeaviateObject; +import io.weaviate.client.v1.graphql.model.GraphQLResponse; +import io.weaviate.client.v1.graphql.query.argument.NearVectorArgument; +import io.weaviate.client.v1.graphql.query.fields.Field; +import io.weaviate.client.v1.schema.model.DataType; +import io.weaviate.client.v1.schema.model.Property; +import io.weaviate.client.v1.schema.model.WeaviateClass; +import jakarta.annotation.PostConstruct; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.stereotype.Repository; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.stream.Collectors; + +/** + * @Auth : Malog + * @Desc : Weaviate向量数据库适配器实现 + * @Time : 2025/11/28 + */ +@Slf4j +@Repository +@Qualifier("weaviateVectorRepository") +public class WeaviateVectorRepository implements IVectorService { + + @Autowired + private WeaviateClient weaviateClient; + + @Autowired + private String weaviateClassName; + + @Autowired + private DashScopeEmbeddingService embeddingService; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * 初始化Weaviate Schema + */ + @PostConstruct + public void initializeSchema() { + try { + // 检查类是否已存在 + Result existsResult = weaviateClient.schema().classGetter().withClassName(weaviateClassName).run(); + + if (existsResult.hasErrors()) { + // 如果获取失败,可能是类不存在,尝试创建 + log.info("Weaviate类 {} 不存在,开始创建", weaviateClassName); + createWeaviateClass(); + log.info("Weaviate类 {} 创建成功", weaviateClassName); + } else { + log.info("Weaviate类 {} 已存在", weaviateClassName); + } + } catch (Exception e) { + log.error("初始化Weaviate Schema失败", e); + } + } + + /** + * 创建Weaviate类定义 + */ + private void createWeaviateClass() { + WeaviateClass weaviateClass = WeaviateClass.builder() + .className(weaviateClassName) + .description("学习向量数据存储") + .vectorizer("none") // 使用自定义向量 + .properties(Arrays.asList( + Property.builder() + .name("userId") + .dataType(Arrays.asList(DataType.TEXT)) + .description("用户ID") + .build(), + Property.builder() + .name("questionId") + .dataType(Arrays.asList(DataType.TEXT)) + .description("题目ID") + .build(), + Property.builder() + .name("actionType") + .dataType(Arrays.asList(DataType.TEXT)) + .description("操作类型") + .build(), + Property.builder() + .name("questionContent") + .dataType(Arrays.asList(DataType.TEXT)) + .description("题目内容") + .build(), + Property.builder() + .name("subject") + .dataType(Arrays.asList(DataType.TEXT)) + .description("科目") + .build(), + Property.builder() + .name("knowledgePointId") + .dataType(Arrays.asList(DataType.INT)) + .description("知识点ID") + .build(), + Property.builder() + .name("metadata") + .dataType(Arrays.asList(DataType.TEXT)) + .description("元数据JSON") + .build(), + Property.builder() + .name("createdAt") + .dataType(Arrays.asList(DataType.TEXT)) + .description("创建时间") + .build() + )) + .build(); + + Result result = weaviateClient.schema().classCreator().withClass(weaviateClass).run(); + if (result.hasErrors()) { + throw new RuntimeException("创建Weaviate类失败: " + result.getError().getMessages()); + } + } + + @Override + public boolean storeLearningVector(String userId, String questionId, String questionContent, + String actionType, String subject, Integer knowledgePointId) { + try { + // 构建向量化文本 + String vectorText = buildVectorText(actionType, questionContent, subject); + + // 生成向量 + float[] embedding = embeddingService.generateEmbedding(vectorText); + + // 转换为Float[]数组 + Float[] embeddingArray = new Float[embedding.length]; + for (int i = 0; i < embedding.length; i++) { + embeddingArray[i] = embedding[i]; + } + + // 构建元数据 + Map metadata = buildMetadata(actionType, subject, knowledgePointId); + String metadataJson = objectMapper.writeValueAsString(metadata); + + // 构建Weaviate对象 + Map properties = new HashMap<>(); + properties.put("userId", userId); + properties.put("questionId", questionId); + properties.put("actionType", actionType); + properties.put("questionContent", questionContent); + properties.put("subject", subject); + properties.put("knowledgePointId", knowledgePointId); + properties.put("metadata", metadataJson); + properties.put("createdAt", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + + // 存储到Weaviate + Result result = weaviateClient.data().creator() + .withClassName(weaviateClassName) + .withProperties(properties) + .withVector(embeddingArray) + .run(); + + if (result.hasErrors()) { + log.error("=== Weaviate存储错误 ==="); + log.error("错误信息: {}", result.getError().getMessages()); + log.error("用户ID: {}", userId); + log.error("题目ID: {}", questionId); + log.error("======================"); + return false; + } + + log.info("=== 学习向量存储成功 ==="); + log.info("用户ID: {}", userId); + log.info("题目ID: {}", questionId); + log.info("行为类型: {}", actionType); + log.info("科目: {}", subject != null ? subject : "未知"); + log.info("题目内容: {}", questionContent != null ? + (questionContent.length() > 100 ? questionContent.substring(0, 100) + "..." : questionContent) : "无"); + log.info("向量维度: {}", embeddingArray.length); + log.info("存储时间: {}", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + log.info("========================"); + return true; + + } catch (Exception e) { + log.error("=== 学习向量存储失败 ==="); + log.error("用户ID: {}", userId); + log.error("题目ID: {}", questionId); + log.error("行为类型: {}", actionType); + log.error("错误信息: {}", e.getMessage()); + log.error("========================"); + return false; + } + } + + @Override + public List searchSimilarQuestions(String userId, String queryText, int limit) { + try { + // 生成查询向量 + float[] queryEmbedding = embeddingService.generateEmbedding(queryText); + + // 转换为Float[]数组 + Float[] queryEmbeddingArray = new Float[queryEmbedding.length]; + for (int i = 0; i < queryEmbedding.length; i++) { + queryEmbeddingArray[i] = queryEmbedding[i]; + } + + // 构建GraphQL查询 + Field[] fields = { + Field.builder().name("userId").build(), + Field.builder().name("questionId").build(), + Field.builder().name("questionContent").build(), + Field.builder().name("actionType").build(), + Field.builder().name("subject").build(), + Field.builder().name("createdAt").build(), + Field.builder().name("_additional").fields( + Field.builder().name("distance").build() + ).build() + }; + + NearVectorArgument nearVector = NearVectorArgument.builder() + .vector(queryEmbeddingArray) + .build(); + + io.weaviate.client.v1.filters.WhereFilter whereFilter = io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"userId"}) + .operator(io.weaviate.client.v1.filters.Operator.Equal) + .valueText(userId) + .build(); + + Result result = weaviateClient.graphQL().get() + .withClassName(weaviateClassName) + .withFields(fields) + .withNearVector(nearVector) + .withWhere(whereFilter) + .withLimit(limit) + .run(); + + if (result.hasErrors()) { + log.error("Weaviate相似度搜索失败: {}", result.getError().getMessages()); + return Collections.emptyList(); + } + + return parseSearchResults(result.getResult()); + + } catch (Exception e) { + log.error("搜索相似题目失败,userId:{} queryText:{}", userId, queryText, e); + return Collections.emptyList(); + } + } + + @Override + public List getUserWeaknesses(String userId) { + try { + // 查询用户的错题数据 + List> mistakeData = queryUserDataByActionType(userId, "mistake"); + + // 分析薄弱点 + return analyzeWeaknesses(mistakeData); + + } catch (Exception e) { + log.error("获取用户薄弱点失败,userId:{}", userId, e); + return Collections.emptyList(); + } + } + + @Override + public List getUserRecommendations(String userId) { + try { + // 获取用户所有学习数据 + List> allData = queryAllUserData(userId); + + // 生成学习推荐 + return generateRecommendations(allData); + + } catch (Exception e) { + log.error("获取用户学习推荐失败,userId:{}", userId, e); + return Collections.emptyList(); + } + } + + @Override + public void analyzeUserLearningPatterns(String userId) { + try { + log.info("开始分析用户学习模式,userId:{}", userId); + + // 获取用户所有学习数据 + List> allData = queryAllUserData(userId); + + if (allData.isEmpty()) { + log.info("用户暂无学习数据,userId:{}", userId); + return; + } + + // 分析学习模式并生成洞察 + List insights = generateLearningInsights(allData); + + // 这里可以将洞察结果存储到其他地方,比如MySQL或Redis + log.info("用户学习模式分析完成,userId:{} 生成洞察数量:{}", userId, insights.size()); + + } catch (Exception e) { + log.error("分析用户学习模式失败,userId:{}", userId, e); + } + } + + /** + * 查询用户特定行为类型的数据 + */ + private List> queryUserDataByActionType(String userId, String actionType) { + try { + Field[] fields = { + Field.builder().name("userId").build(), + Field.builder().name("questionId").build(), + Field.builder().name("questionContent").build(), + Field.builder().name("actionType").build(), + Field.builder().name("subject").build(), + Field.builder().name("knowledgePointId").build(), + Field.builder().name("createdAt").build() + }; + + io.weaviate.client.v1.filters.WhereFilter whereFilter = io.weaviate.client.v1.filters.WhereFilter.builder() + .operator(io.weaviate.client.v1.filters.Operator.And) + .operands(new io.weaviate.client.v1.filters.WhereFilter[]{ + io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"userId"}) + .operator(io.weaviate.client.v1.filters.Operator.Equal) + .valueText(userId) + .build(), + io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"actionType"}) + .operator(io.weaviate.client.v1.filters.Operator.Equal) + .valueText(actionType) + .build() + }) + .build(); + + Result result = weaviateClient.graphQL().get() + .withClassName(weaviateClassName) + .withFields(fields) + .withWhere(whereFilter) + .withLimit(1000) + .run(); + + if (result.hasErrors()) { + log.error("查询用户数据失败: {}", result.getError().getMessages()); + return Collections.emptyList(); + } + + return parseQueryResults(result.getResult()); + + } catch (Exception e) { + log.error("查询用户数据失败,userId:{} actionType:{}", userId, actionType, e); + return Collections.emptyList(); + } + } + + /** + * 查询用户所有数据 + */ + private List> queryAllUserData(String userId) { + try { + Field[] fields = { + Field.builder().name("userId").build(), + Field.builder().name("questionId").build(), + Field.builder().name("questionContent").build(), + Field.builder().name("actionType").build(), + Field.builder().name("subject").build(), + Field.builder().name("knowledgePointId").build(), + Field.builder().name("createdAt").build() + }; + + io.weaviate.client.v1.filters.WhereFilter whereFilter = io.weaviate.client.v1.filters.WhereFilter.builder() + .path(new String[]{"userId"}) + .operator(io.weaviate.client.v1.filters.Operator.Equal) + .valueText(userId) + .build(); + + Result result = weaviateClient.graphQL().get() + .withClassName(weaviateClassName) + .withFields(fields) + .withWhere(whereFilter) + .withLimit(1000) + .run(); + + if (result.hasErrors()) { + log.error("查询用户所有数据失败: {}", result.getError().getMessages()); + return Collections.emptyList(); + } + + return parseQueryResults(result.getResult()); + + } catch (Exception e) { + log.error("查询用户所有数据失败,userId:{}", userId, e); + return Collections.emptyList(); + } + } + + /** + * 解析搜索结果 + */ + @SuppressWarnings("unchecked") + private List parseSearchResults(GraphQLResponse response) { + List results = new ArrayList<>(); + + try { + Object dataObj = response.getData(); + if (!(dataObj instanceof Map)) { + log.warn("GraphQL响应数据格式不正确"); + return results; + } + Map data = (Map) dataObj; + if (data != null && data.containsKey("Get")) { + Map get = (Map) data.get("Get"); + if (get.containsKey(weaviateClassName)) { + List> objects = (List>) get.get(weaviateClassName); + + for (Map obj : objects) { + SimilarQuestionVO vo = SimilarQuestionVO.builder() + .questionId((String) obj.get("questionId")) + .questionContent((String) obj.get("questionContent")) + .actionType((String) obj.get("actionType")) + .subject((String) obj.get("subject")) + .createdAt((String) obj.get("createdAt")) + .build(); + + // 获取相似度分数 + if (obj.containsKey("_additional")) { + Map additional = (Map) obj.get("_additional"); + if (additional.containsKey("distance")) { + Double distance = (Double) additional.get("distance"); + // 将距离转换为相似度 (1 - distance) + vo.setSimilarity(1.0 - distance); + } + } + + results.add(vo); + } + } + } + } catch (Exception e) { + log.error("解析搜索结果失败", e); + } + + return results; + } + + /** + * 解析查询结果 + */ + @SuppressWarnings("unchecked") + private List> parseQueryResults(GraphQLResponse response) { + List> results = new ArrayList<>(); + + try { + Object dataObj = response.getData(); + if (!(dataObj instanceof Map)) { + log.warn("GraphQL响应数据格式不正确"); + return results; + } + Map data = (Map) dataObj; + if (data != null && data.containsKey("Get")) { + Map get = (Map) data.get("Get"); + if (get.containsKey(weaviateClassName)) { + List> objects = (List>) get.get(weaviateClassName); + results.addAll(objects); + } + } + } catch (Exception e) { + log.error("解析查询结果失败", e); + } + + return results; + } + + // 以下方法与原VectorRepository中的实现类似,但使用Map而不是LearningVector + + private String buildVectorText(String actionType, String questionContent, String subject) { + StringBuilder textBuilder = new StringBuilder(); + textBuilder.append("用户").append(getActionDescription(actionType)); + if (subject != null && !subject.trim().isEmpty()) { + textBuilder.append("了一道").append(subject).append("题目:"); + } else { + textBuilder.append("了一道题目:"); + } + if (questionContent != null && !questionContent.trim().isEmpty()) { + String content = questionContent.trim(); + if (content.length() > 500) { + content = content.substring(0, 500) + "..."; + } + textBuilder.append(content); + } + return textBuilder.toString(); + } + + private String getActionDescription(String actionType) { + switch (actionType) { + case "upload": return "上传"; + case "review": return "复习"; + case "qa": return "问答"; + case "mistake": return "做错"; + default: return "学习"; + } + } + + private Map buildMetadata(String actionType, String subject, Integer knowledgePointId) { + Map metadata = new HashMap<>(); + metadata.put("actionType", actionType); + if (subject != null) metadata.put("subject", subject); + if (knowledgePointId != null) metadata.put("knowledgePointId", knowledgePointId); + metadata.put("timestamp", System.currentTimeMillis()); + metadata.put("embeddingModel", "dashscope-text-embedding-v1"); + return metadata; + } + + private List analyzeWeaknesses(List> vectors) { + List insights = new ArrayList<>(); + if (vectors.isEmpty()) return insights; + + // 按科目分组分析 + Map>> subjectGroups = vectors.stream() + .filter(v -> v.get("subject") != null && !v.get("subject").toString().trim().isEmpty()) + .collect(Collectors.groupingBy(v -> v.get("subject").toString())); + + for (Map.Entry>> entry : subjectGroups.entrySet()) { + String subject = entry.getKey(); + List> subjectVectors = entry.getValue(); + + if (subjectVectors.size() >= 3) { + insights.add(LearningInsightVO.builder() + .type("weakness") + .title(subject + "学科薄弱") + .description(String.format("在%s学科中错误%d道题目,建议加强练习", subject, subjectVectors.size())) + .confidenceScore(Math.min(0.9, 0.5 + subjectVectors.size() * 0.1)) + .relatedQuestions(subjectVectors.stream() + .map(v -> v.get("questionId").toString()) + .collect(Collectors.toList())) + .createdAt(LocalDateTime.now()) + .isActive(true) + .build()); + } + } + + return insights; + } + + private List generateRecommendations(List> vectors) { + List recommendations = new ArrayList<>(); + + if (vectors.isEmpty()) { + recommendations.add(LearningInsightVO.builder() + .type("recommendation") + .title("开始学习之旅") + .description("还没有学习记录,建议开始上传题目进行学习") + .confidenceScore(1.0) + .createdAt(LocalDateTime.now()) + .isActive(true) + .build()); + return recommendations; + } + + // 分析学习频率等逻辑... + return recommendations; + } + + private List generateLearningInsights(List> vectors) { + List insights = new ArrayList<>(); + // 生成学习洞察的逻辑... + return insights; + } +} \ No newline at end of file diff --git a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/dao/vector/IVectorDao.java b/refine-infrastructure/src/main/java/com/achobeta/infrastructure/dao/vector/IVectorDao.java deleted file mode 100644 index 095f764..0000000 --- a/refine-infrastructure/src/main/java/com/achobeta/infrastructure/dao/vector/IVectorDao.java +++ /dev/null @@ -1,73 +0,0 @@ -package com.achobeta.infrastructure.dao.vector; - -import com.achobeta.infrastructure.dao.po.LearningVector; -import com.achobeta.domain.rag.model.valobj.LearningInsightVO; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Param; - -import java.util.List; -import java.util.Map; - -/** - * @Auth : Malog - * @Desc : 向量数据访问接口 - * @Time : 2025/11/10 - */ -@Mapper -public interface IVectorDao { - - /** - * 插入学习向量 - */ - int insertLearningVector(LearningVector vector); - - /** - * 向量相似度搜索 - */ - List searchSimilarVectors(@Param("userId") String userId, - @Param("queryVector") float[] queryVector, - @Param("limit") int limit); - - /** - * 根据行为类型获取用户向量 - */ - List getUserVectorsByActionType(@Param("userId") String userId, - @Param("actionType") String actionType); - - /** - * 获取用户所有向量 - */ - List getUserAllVectors(@Param("userId") String userId); - - /** - * 保存学习洞察 - */ - int saveLearningInsight(@Param("userId") String userId, - @Param("insight") LearningInsightVO insight); - - /** - * 获取用户学习洞察 - */ - List getUserLearningInsights(@Param("userId") String userId, - @Param("insightType") String insightType); - - /** - * 获取用户最近N天的学习向量数据 - */ - List getUserRecentLearningData(@Param("userId") String userId, @Param("days") int days); - - /** - * 获取用户最近N天的学习统计数据 - */ - Map getUserLearningStatistics(@Param("userId") String userId, @Param("days") int days); - - /** - * 获取用户最近N天按科目分组的学习数据 - */ - List> getUserLearningDataBySubject(@Param("userId") String userId, @Param("days") int days); - - /** - * 获取用户最近N天按行为类型分组的学习数据 - */ - List> getUserLearningDataByActionType(@Param("userId") String userId, @Param("days") int days); -} \ No newline at end of file diff --git a/refine-trigger/src/main/java/com/achobeta/trigger/event/UserBehaviorEventListener.java b/refine-trigger/src/main/java/com/achobeta/trigger/event/UserBehaviorEventListener.java new file mode 100644 index 0000000..86a97e4 --- /dev/null +++ b/refine-trigger/src/main/java/com/achobeta/trigger/event/UserBehaviorEventListener.java @@ -0,0 +1,162 @@ +package com.achobeta.trigger.event; + +import com.achobeta.domain.rag.service.IVectorService; +import com.achobeta.domain.user.event.UserBehaviorEvent; +import com.achobeta.domain.user.event.UserMistakeEvent; +import com.achobeta.domain.user.event.UserReviewEvent; +import com.achobeta.domain.user.event.UserUploadEvent; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.event.EventListener; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Component; + +/** + * @Auth : Malog + * @Desc : 用户行为事件监听器 + * @Time : 2025/11/28 + */ +@Slf4j +@Component +public class UserBehaviorEventListener { + + @Autowired + private IVectorService vectorService; + + /** + * 处理用户复习事件 + */ + @EventListener + @Async + public void handleUserReviewEvent(UserReviewEvent event) { + try { + log.info("检测到用户复习事件,开始记录向量数据,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + + boolean success = vectorService.storeLearningVector( + event.getUserId(), + event.getQuestionId(), + event.getQuestionContent(), + event.getActionType(), + event.getSubject(), + event.getKnowledgePointId() + ); + + if (success) { + log.info("用户复习行为向量记录成功,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } else { + log.warn("用户复习行为向量记录失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } + + } catch (Exception e) { + log.error("处理用户复习事件失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId(), e); + } + } + + /** + * 处理用户错题事件 + */ + @EventListener + @Async + public void handleUserMistakeEvent(UserMistakeEvent event) { + try { + log.info("检测到用户错题事件,开始记录向量数据,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + + boolean success = vectorService.storeLearningVector( + event.getUserId(), + event.getQuestionId(), + event.getQuestionContent(), + event.getActionType(), + event.getSubject(), + event.getKnowledgePointId() + ); + + if (success) { + log.info("用户错题行为向量记录成功,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } else { + log.warn("用户错题行为向量记录失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } + + } catch (Exception e) { + log.error("处理用户错题事件失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId(), e); + } + } + + /** + * 处理用户上传事件 + */ + @EventListener + @Async + public void handleUserUploadEvent(UserUploadEvent event) { + try { + log.info("检测到用户上传事件,开始记录向量数据,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + + boolean success = vectorService.storeLearningVector( + event.getUserId(), + event.getQuestionId(), + event.getQuestionContent(), + event.getActionType(), + event.getSubject(), + event.getKnowledgePointId() + ); + + if (success) { + log.info("用户上传行为向量记录成功,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } else { + log.warn("用户上传行为向量记录失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId()); + } + + } catch (Exception e) { + log.error("处理用户上传事件失败,userId:{}, questionId:{}", + event.getUserId(), event.getQuestionId(), e); + } + } + + /** + * 处理通用用户行为事件 + */ + @EventListener + @Async + public void handleUserBehaviorEvent(UserBehaviorEvent event) { + try { + // 避免重复处理已经有专门监听器的事件 + if (event instanceof UserReviewEvent || event instanceof UserMistakeEvent || event instanceof UserUploadEvent) { + return; + } + + log.info("检测到用户行为事件,开始记录向量数据,userId:{}, actionType:{}, questionId:{}", + event.getUserId(), event.getActionType(), event.getQuestionId()); + + boolean success = vectorService.storeLearningVector( + event.getUserId(), + event.getQuestionId(), + event.getQuestionContent(), + event.getActionType(), + event.getSubject(), + event.getKnowledgePointId() + ); + + if (success) { + log.info("用户行为向量记录成功,userId:{}, actionType:{}, questionId:{}", + event.getUserId(), event.getActionType(), event.getQuestionId()); + } else { + log.warn("用户行为向量记录失败,userId:{}, actionType:{}, questionId:{}", + event.getUserId(), event.getActionType(), event.getQuestionId()); + } + + } catch (Exception e) { + log.error("处理用户行为事件失败,userId:{}, actionType:{}, questionId:{}", + event.getUserId(), event.getActionType(), event.getQuestionId(), e); + } + } +} \ No newline at end of file diff --git a/refine-trigger/src/main/java/com/achobeta/trigger/http/LearningAnalysisController.java b/refine-trigger/src/main/java/com/achobeta/trigger/http/LearningAnalysisController.java index 664aa1e..f52c315 100644 --- a/refine-trigger/src/main/java/com/achobeta/trigger/http/LearningAnalysisController.java +++ b/refine-trigger/src/main/java/com/achobeta/trigger/http/LearningAnalysisController.java @@ -13,6 +13,7 @@ import com.achobeta.types.common.UserContext; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.web.bind.annotation.*; import java.util.List; @@ -33,6 +34,7 @@ public class LearningAnalysisController { private LearningAnalysisService learningAnalysisService; @Autowired + @Qualifier("weaviateVectorRepository") private IVectorService vectorService; /**