From cfff3c62d5e7781d199b0a08880688ebc100b9de Mon Sep 17 00:00:00 2001 From: chenzhi Date: Fri, 11 Jul 2025 16:10:16 +0900 Subject: [PATCH 01/12] add rerank method --- dev-share-api/Controllers/ApiController.cs | 45 +++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index 14d24e1..810a47e 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -129,9 +129,18 @@ public async Task> Search([FromBody] SearchRequest request try { + //1.gen prompt embedding var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Text); var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Text); - var results = await _vectorService.SearchEmbeddingAsync(denseQueryVector: denseEmbedding, sparseIndices: indices, sparseValues: values, topK: request.TopRelatives); + + //2.search prompt and get result(content and comment) + var ContentResults; + var InsightResults; + //3. do rerank and get reranked list + var rerankResults = GetRerankedList(ContentResults,InsightResults); + //4. get finalResults from sql server by id + + return Ok(results); } catch (Exception ex) @@ -244,4 +253,38 @@ private async Task TryPlaywright(string url) var text = await page.EvalOnSelectorAllAsync("p", "els => els.map(e => e.innerText).filter(t => t.trim().length > 0)"); return string.Join("\n", text); } + + //todo make sure the return data from service is List and List + public List GetRerankedList(List contents, List comments) + { + // averge comment.score + var commentGroups = comments + .GroupBy(c => c.ContentId) + .ToDictionary( + g => g.Key, + g => g.Average(c => c.Score) + ); + + // content.score find table + var contentScores = contents + .ToDictionary(c => c.Id, c => c.Score); + + // union all contentId + var allContentIds = contentScores.Keys + .Union(commentGroups.Keys) + .Distinct(); + + var result = allContentIds + .Select(id => new Rerank + { + ContentId = id, + Score = + (contentScores.TryGetValue(id, out var cScore) ? cScore : 0) * 0.7 + + (commentGroups.TryGetValue(id, out var comAvg) ? comAvg : 0) * 0.3 + }) + .OrderByDescending(r => r.Score) + .ToList(); + + return result; + } } \ No newline at end of file From a516fa704a236ccae4f03fe05a70ae8dc1d1af9d Mon Sep 17 00:00:00 2001 From: chenzhi Date: Mon, 14 Jul 2025 20:35:27 +0900 Subject: [PATCH 02/12] add all rerank-api needed function without taking insightresults --- dev-share-api/Controllers/ApiController.cs | 24 +++++++++++++++++----- dev-share-api/Models/Rerank.cs | 7 +++++++ 2 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 dev-share-api/Models/Rerank.cs diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index 810a47e..4d428cd 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -22,12 +22,16 @@ public class ExtractController : ControllerBase private readonly IEmbeddingService _embeddingService; private readonly IVectorService _vectorService; private readonly ShareChainExecutor _shareChainExecutor; + private readonly IUserInsightService _userInsightService; + private readonly IResourceService _resourceService; private static readonly ConcurrentDictionary TaskStore = new(); public ExtractController( ISummaryService summaryService, IEmbeddingService embeddingService, IVectorService vectorService, + IUserInsightService _userInsightService; + IResourceService _resourceService; ShareChainExecutor shareChainExecutor) { _summaryService = summaryService; @@ -118,7 +122,7 @@ public IActionResult GetStatus(string taskId) } [HttpPost("search")] - public async Task> Search([FromBody] SearchRequest request) + public async Task> Search([FromBody] SearchRequest request) { if (string.IsNullOrWhiteSpace(request.Text)) { @@ -132,15 +136,25 @@ public async Task> Search([FromBody] SearchRequest request //1.gen prompt embedding var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Text); var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Text); - + //2.search prompt and get result(content and comment) - var ContentResults; + var ContentResults = await _vectorService.SearchEmbeddingAsync(denseQueryVector: denseEmbedding, sparseIndices: indices, sparseValues: values, topK: request.TopRelatives); + //todo var InsightResults; + //3. do rerank and get reranked list var rerankResults = GetRerankedList(ContentResults,InsightResults); - //4. get finalResults from sql server by id - + //4. get finalResults from sql server by id + var results = new List(); + foreach(var item in reranResults) + { + var contentId = item.ContentId; + var resource = await _resourceService.GetResourceById(contentId); + if(resource != null){ + results.Add(resource); + } + } return Ok(results); } catch (Exception ex) diff --git a/dev-share-api/Models/Rerank.cs b/dev-share-api/Models/Rerank.cs new file mode 100644 index 0000000..8ccc221 --- /dev/null +++ b/dev-share-api/Models/Rerank.cs @@ -0,0 +1,7 @@ +namespace Models; + +public class Rerank +{ + public int ContentId { get; set; } + public double Score { get; set; } +} \ No newline at end of file From efb3462c33ef90880655c24798257fa13ce1f725 Mon Sep 17 00:00:00 2001 From: chenzhi Date: Mon, 14 Jul 2025 21:25:24 +0900 Subject: [PATCH 03/12] rerank-api finished --- dev-share-api/Controllers/ApiController.cs | 101 +++++++----------- .../Handle/DatabaseShareChainHandle.cs | 19 +++- .../Handle/EmbeddingShareChainHandle.cs | 5 +- 3 files changed, 61 insertions(+), 64 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index ab235ce..e67d5c0 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -28,9 +28,9 @@ public ExtractController( ISummaryService summaryService, IEmbeddingService embeddingService, IVectorService vectorService, - IUserInsightService _userInsightService; - IResourceService _resourceService; - ShareChainExecutor shareChainExecutor) + IUserInsightService _userInsightService, + IResourceService _resourceService, + ShareChainExecutor shareChainExecutor, OnlineResearchService onlineResearchService) { _summaryService = summaryService; @@ -122,7 +122,7 @@ public IActionResult GetStatus(string taskId) } [HttpPost("search")] - public async Task> Search([FromBody] SearchRequest request) + public async Task> Search([FromBody] SearchRequest request) { if (string.IsNullOrWhiteSpace(request.Text)) { @@ -133,35 +133,48 @@ public async Task> Search([FromBody] SearchRequest request) try { - //1.gen prompt embedding - var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Text); - var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Text); - - //2.search prompt and get result(content and comment) - var ContentResults = await _vectorService.SearchEmbeddingAsync(denseQueryVector: denseEmbedding, sparseIndices: indices, sparseValues: values, topK: request.TopRelatives); - //todo - var InsightResults; - - //3. do rerank and get reranked list - var rerankResults = GetRerankedList(ContentResults,InsightResults); - - //4. get finalResults from sql server by id - var results = new List(); - foreach(var item in reranResults) + //get vectordb data results + var resourceResults = await _vectorService.SearchResourceAsync( + query: request.Text, + topK: request.TopRelatives); + + var insightResults = await _vectorService.SearchInsightAsync( + query: request.Text, + topK: request.TopRelatives); + + + if (resourceResults == null + || resourceResults.Count == 0 + || insightResults == null + || insightResults.Count == 0) { - var contentId = item.ContentId; - var resource = await _resourceService.GetResourceById(contentId); - if(resource != null){ - results.Add(resource); + // Fallback to online research + var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text); + return Ok(new { source = "online", result = onlineResult }); + }else{ + + //2. do rerank and get reranked list + var rerankResults = GetRerankedList(resourceResults,insightResults); + + //3. get finalResults from sql server by id + var results = new List(); + foreach(var item in reranResults) + { + var contentId = item.ContentId; + var resource = await _resourceService.GetResourceById(contentId); + if(resource != null){ + results.Add(resource); + } } + return Ok(new { source = "vector", result = resourceResults }); } - return Ok(results); } catch (Exception ex) { return StatusCode(500, "Search failed due to an internal error."); } } + [HttpPost("vector/init")] public async Task> InitVectorDB() @@ -201,43 +214,7 @@ public async Task ShareInsight([FromBody] ShareInsightRequest req return Ok(); } - [HttpPost("search")] - public async Task Search([FromBody] SearchRequest request) - { - if (string.IsNullOrWhiteSpace(request.Text)) - { - return BadRequest("Search text cannot be empty."); - } - if (request.TopRelatives <= 0 || request.TopRelatives > 100) - return BadRequest("TopRelatives must be between 1 and 100."); - - try - { - var resourceResults = await _vectorService.SearchResourceAsync( - query: request.Text, - topK: request.TopRelatives); - - var insightResults = await _vectorService.SearchInsightAsync( - query: request.Text, - topK: request.TopRelatives); - - if (resourceResults == null - || resourceResults.Count == 0 - || insightResults == null - || insightResults.Count == 0) - { - // Fallback to online research - var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text); - return Ok(new { source = "online", result = onlineResult }); - } - - return Ok(new { source = "vector", result = resourceResults }); - } - catch (Exception) - { - return StatusCode(500, "Search failed due to an internal error."); - } - } + private string? TryHtmlAgilityPack(string url) { @@ -298,7 +275,7 @@ private async Task TryPlaywright(string url) } //todo make sure the return data from service is List and List - public List GetRerankedList(List contents, List comments) + public List GetRerankedList(List contents, List comments) { // averge comment.score var commentGroups = comments diff --git a/dev-share-api/Handle/DatabaseShareChainHandle.cs b/dev-share-api/Handle/DatabaseShareChainHandle.cs index 7efef38..518b1a9 100644 --- a/dev-share-api/Handle/DatabaseShareChainHandle.cs +++ b/dev-share-api/Handle/DatabaseShareChainHandle.cs @@ -5,10 +5,14 @@ namespace Services; public class DatabaseShareChainHandle : BaseShareChainHandle { private readonly IVectorService _vectorService; + private readonly IUserInsightService _userInsightService; + private readonly IResourceService _resourceService; - public DatabaseShareChainHandle(IVectorService vectorService) + public DatabaseShareChainHandle(IVectorService vectorService, IUserInsightService userInsightService, IResourceService resourceService) { _vectorService = vectorService; + _userInsightService = userInsightService; + _resourceService = resourceService; } protected override void Validate(ResourceShareContext context) @@ -28,6 +32,13 @@ await _vectorService.UpsertResourceAsync( resourceId, context.Summary!, context.ResourceVectors!); + + await _resourceService.AddResourceAsync( + new ResourceDto{ + ResourceId = resourceId, + Content = context.Summary, + Url = context.Url + }); } await _vectorService.UpsertInsightAsync( @@ -36,6 +47,12 @@ await _vectorService.UpsertInsightAsync( context.Insight!, resourceId, context.InsightVectors!); + await _userInsightService.AddUserInsightAsync( + new UserInsightDto{ + ResourceId = resourceId, + Content = context.Insight + }); + return HandlerResult.Success(); } } \ No newline at end of file diff --git a/dev-share-api/Handle/EmbeddingShareChainHandle.cs b/dev-share-api/Handle/EmbeddingShareChainHandle.cs index 90ba0cb..07e2863 100644 --- a/dev-share-api/Handle/EmbeddingShareChainHandle.cs +++ b/dev-share-api/Handle/EmbeddingShareChainHandle.cs @@ -20,7 +20,10 @@ protected override void Validate(ResourceShareContext context) protected override async Task ProcessAsync(ResourceShareContext context) { - context.ResourceVectors = await GetVectors(context.Summary); + if(context.ExistingResource == null) + { + context.ResourceVectors = await GetVectors(context.Summary); + } if (!string.IsNullOrWhiteSpace(context.Insight)) { From ac9861c4be4a36d27ab17eff11bf7a3d4050ef0b Mon Sep 17 00:00:00 2001 From: Travis Date: Mon, 14 Jul 2025 22:38:09 +1000 Subject: [PATCH 04/12] Fix bugs and update methods --- dev-share-api/Controllers/ApiController.cs | 44 ++++++++++--------- .../Handle/DatabaseShareChainHandle.cs | 25 ++++++----- dev-share-api/Models/Rerank.cs | 2 +- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index e67d5c0..bcee99b 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -122,11 +122,11 @@ public IActionResult GetStatus(string taskId) } [HttpPost("search")] - public async Task> Search([FromBody] SearchRequest request) + public async Task Search([FromBody] SearchRequest request) { if (string.IsNullOrWhiteSpace(request.Text)) { - return BadRequest("Search text cannot be empty."); + return BadRequest(new { message = "Search text cannot be empty." }); } if (request.TopRelatives <= 0 || request.TopRelatives > 100) return BadRequest("TopRelatives must be between 1 and 100."); @@ -141,32 +141,34 @@ public async Task> Search([FromBody] SearchRequest request) var insightResults = await _vectorService.SearchInsightAsync( query: request.Text, topK: request.TopRelatives); - - if (resourceResults == null - || resourceResults.Count == 0 - || insightResults == null + + if (resourceResults == null + || resourceResults.Count == 0 + || insightResults == null || insightResults.Count == 0) { // Fallback to online research var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text); return Ok(new { source = "online", result = onlineResult }); - }else{ - + } + else + { //2. do rerank and get reranked list - var rerankResults = GetRerankedList(resourceResults,insightResults); + var rerankResults = GetRerankedList(resourceResults, insightResults); //3. get finalResults from sql server by id var results = new List(); - foreach(var item in reranResults) + foreach (var item in rerankResults) { var contentId = item.ContentId; - var resource = await _resourceService.GetResourceById(contentId); - if(resource != null){ + var resource = await _resourceService.GetResourceById(long.Parse(contentId)); + if (resource != null) + { results.Add(resource); } } - return Ok(new { source = "vector", result = resourceResults }); + return Ok(new { source = "vector", result = results }); } } catch (Exception ex) @@ -174,7 +176,7 @@ public async Task> Search([FromBody] SearchRequest request) return StatusCode(500, "Search failed due to an internal error."); } } - + [HttpPost("vector/init")] public async Task> InitVectorDB() @@ -214,7 +216,7 @@ public async Task ShareInsight([FromBody] ShareInsightRequest req return Ok(); } - + private string? TryHtmlAgilityPack(string url) { @@ -273,20 +275,20 @@ private async Task TryPlaywright(string url) var text = await page.EvalOnSelectorAllAsync("p", "els => els.map(e => e.innerText).filter(t => t.trim().length > 0)"); return string.Join("\n", text); } - + //todo make sure the return data from service is List and List - public List GetRerankedList(List contents, List comments) + private static List GetRerankedList(List resources, List insights) { // averge comment.score - var commentGroups = comments - .GroupBy(c => c.ContentId) + var commentGroups = insights + .GroupBy(c => c.ResourceId) .ToDictionary( g => g.Key, g => g.Average(c => c.Score) ); // content.score find table - var contentScores = contents + var contentScores = resources .ToDictionary(c => c.Id, c => c.Score); // union all contentId @@ -298,7 +300,7 @@ public List GetRerankedList(List contents, List new Rerank { ContentId = id, - Score = + Score = (contentScores.TryGetValue(id, out var cScore) ? cScore : 0) * 0.7 + (commentGroups.TryGetValue(id, out var comAvg) ? comAvg : 0) * 0.3 }) diff --git a/dev-share-api/Handle/DatabaseShareChainHandle.cs b/dev-share-api/Handle/DatabaseShareChainHandle.cs index 518b1a9..256efe7 100644 --- a/dev-share-api/Handle/DatabaseShareChainHandle.cs +++ b/dev-share-api/Handle/DatabaseShareChainHandle.cs @@ -23,36 +23,39 @@ protected override void Validate(ResourceShareContext context) protected override async Task ProcessAsync(ResourceShareContext context) { - var resourceId = IdGeneratorUtil.GetNextId().ToString(); + var resourceId = IdGeneratorUtil.GetNextId(); if (context.ExistingResource == null) { await _vectorService.UpsertResourceAsync( context.Url!, - resourceId, + resourceId.ToString(), context.Summary!, context.ResourceVectors!); - + await _resourceService.AddResourceAsync( - new ResourceDto{ - ResourceId = resourceId, - Content = context.Summary, - Url = context.Url - }); + new ResourceDto + { + ResourceId = resourceId, + Content = context.Summary, + Url = context.Url + }); } await _vectorService.UpsertInsightAsync( IdGeneratorUtil.GetNextId().ToString(), context.Url!, context.Insight!, - resourceId, + resourceId.ToString(), context.InsightVectors!); + await _userInsightService.AddUserInsightAsync( - new UserInsightDto{ + new UserInsightDto + { ResourceId = resourceId, Content = context.Insight }); - + return HandlerResult.Success(); } } \ No newline at end of file diff --git a/dev-share-api/Models/Rerank.cs b/dev-share-api/Models/Rerank.cs index 8ccc221..4776228 100644 --- a/dev-share-api/Models/Rerank.cs +++ b/dev-share-api/Models/Rerank.cs @@ -2,6 +2,6 @@ namespace Models; public class Rerank { - public int ContentId { get; set; } + public string ContentId { get; set; } public double Score { get; set; } } \ No newline at end of file From 731611e6de2965fe369957170d944822519c1c6a Mon Sep 17 00:00:00 2001 From: chenzhi Date: Mon, 14 Jul 2025 22:10:56 +0900 Subject: [PATCH 05/12] test version --- dev-share-api/Controllers/ApiController.cs | 6 +-- .../Factory/DevShareDbContextFactory.cs | 46 +++++++++---------- dev-share-api/Program.cs | 2 +- dev-share-api/Services/DependencyInjection.cs | 6 ++- .../Services/OnlineResearchService.cs | 13 +++--- dev-share-api/appsettings.json | 3 +- 6 files changed, 41 insertions(+), 35 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index bcee99b..7400c44 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -21,7 +21,7 @@ public class ExtractController : ControllerBase private readonly ShareChainExecutor _shareChainExecutor; private readonly IUserInsightService _userInsightService; private readonly IResourceService _resourceService; - private readonly OnlineResearchService _onlineResearchService; + private readonly IOnlineResearchService _onlineResearchService; private static readonly ConcurrentDictionary TaskStore = new(); public ExtractController( @@ -31,7 +31,7 @@ public ExtractController( IUserInsightService _userInsightService, IResourceService _resourceService, ShareChainExecutor shareChainExecutor, - OnlineResearchService onlineResearchService) + IOnlineResearchService onlineResearchService) { _summaryService = summaryService; _embeddingService = embeddingService; @@ -149,7 +149,7 @@ public async Task Search([FromBody] SearchRequest request) || insightResults.Count == 0) { // Fallback to online research - var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text); + var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text,3); return Ok(new { source = "online", result = onlineResult }); } else diff --git a/dev-share-api/Factory/DevShareDbContextFactory.cs b/dev-share-api/Factory/DevShareDbContextFactory.cs index 746c2ea..753c45d 100644 --- a/dev-share-api/Factory/DevShareDbContextFactory.cs +++ b/dev-share-api/Factory/DevShareDbContextFactory.cs @@ -1,28 +1,28 @@ -using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Design; -using Data; +// using Microsoft.EntityFrameworkCore; +// using Microsoft.EntityFrameworkCore.Design; +// using Data; -public class DevShareDbContextFactory : IDesignTimeDbContextFactory -{ - public DevShareDbContext CreateDbContext(string[] args) - { - var basePath = Directory.GetCurrentDirectory(); +// public class DevShareDbContextFactory : IDesignTimeDbContextFactory +// { +// public DevShareDbContext CreateDbContext(string[] args) +// { +// var basePath = Directory.GetCurrentDirectory(); - var environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Development"; +// var environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Development"; - var configuration = new ConfigurationBuilder() - .SetBasePath(basePath) - .AddJsonFile("appsettings.json", optional: false) - .AddJsonFile($"appsettings.{environment}.json", optional: true) - .AddJsonFile("appsettings.local.json", optional: true) - .AddEnvironmentVariables() - .Build(); +// var configuration = new ConfigurationBuilder() +// .SetBasePath(basePath) +// .AddJsonFile("appsettings.json", optional: false) +// .AddJsonFile($"appsettings.{environment}.json", optional: true) +// .AddJsonFile("appsettings.local.json", optional: true) +// .AddEnvironmentVariables() +// .Build(); - var connectionString = configuration.GetConnectionString("DefaultConnection") - ?? throw new InvalidOperationException("Connection string 'DefaultConnection' not found."); +// var connectionString = configuration.GetConnectionString("DefaultConnection") +// ?? throw new InvalidOperationException("Connection string 'DefaultConnection' not found."); - var optionsBuilder = new DbContextOptionsBuilder(); - optionsBuilder.UseSqlServer(connectionString); - return new DevShareDbContext(optionsBuilder.Options); - } -} \ No newline at end of file +// var optionsBuilder = new DbContextOptionsBuilder(); +// optionsBuilder.UseSqlServer(connectionString); +// return new DevShareDbContext(optionsBuilder.Options); +// } +// } \ No newline at end of file diff --git a/dev-share-api/Program.cs b/dev-share-api/Program.cs index 18f818f..5c98812 100644 --- a/dev-share-api/Program.cs +++ b/dev-share-api/Program.cs @@ -9,7 +9,7 @@ // optional - if you don't want to have 'appsettings.local.json' for debugging purpose // Load secrets in development before building -if (builder.Environment.IsDevelopment()) builder.Configuration.AddUserSecrets(); +// if (builder.Environment.IsDevelopment()) builder.Configuration.AddUserSecrets(); // Service Registration builder.Services.AddControllers(); diff --git a/dev-share-api/Services/DependencyInjection.cs b/dev-share-api/Services/DependencyInjection.cs index 43fc583..8f6078a 100644 --- a/dev-share-api/Services/DependencyInjection.cs +++ b/dev-share-api/Services/DependencyInjection.cs @@ -40,7 +40,11 @@ public static IServiceCollection AddInfrastructureServices( // Database services.AddDbContext(options => - options.UseSqlServer(configuration.GetConnectionString("DefaultConnection"))); + { + options.UseSqlServer(configuration.GetConnectionString("DefaultConnection")); + Console.Write("database"); + }); + // HTTP Client services.AddHttpClient("FastEmbed", client => diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index bcfe7c4..504b753 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -15,7 +15,7 @@ public class OnlineResearchService : IOnlineResearchService { private readonly AzureOpenAIClient _client; private readonly string _deploymentName = "gpt-4o-mini"; // Set this to your deployment name - private readonly ILogger _logger; + // private readonly ILogger _logger; private static readonly JsonSerializerOptions _jsonOptions = new() { PropertyNameCaseInsensitive = true, @@ -23,11 +23,12 @@ public class OnlineResearchService : IOnlineResearchService }; public OnlineResearchService( - AzureOpenAIClient openAIClient, - ILogger logger) + AzureOpenAIClient openAIClient + // ILogger logger + ) { _client = openAIClient ?? throw new ArgumentNullException(nameof(openAIClient)); - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + // _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public async Task> PerformOnlineResearchAsync(string query, int topK = 3) @@ -44,7 +45,7 @@ public async Task> PerformOnlineResearchAsync(str } catch (Exception ex) { - _logger.LogError(ex, "Error performing online research for query: {Query}", query); + // _logger.LogError(ex, "Error performing online research for query: {Query}", query); throw; } } @@ -86,7 +87,7 @@ private async Task> ParseResponseToVectorResource } catch (JsonException ex) { - _logger.LogWarning(ex, "Failed to parse OpenAI response: {Response}", response); + // _logger.LogWarning(ex, "Failed to parse OpenAI response: {Response}", response); return new[] { CreateFallbackDto(response) }; } } diff --git a/dev-share-api/appsettings.json b/dev-share-api/appsettings.json index d93d228..47a5573 100644 --- a/dev-share-api/appsettings.json +++ b/dev-share-api/appsettings.json @@ -1,6 +1,7 @@ { "ConnectionStrings": { - "DefaultConnection": "" + "DefaultConnection": "Server=tcp:sqldb-dev-share.database.windows.net,1433;Initial Catalog=sqldb-dev-share;Persist Security Info=False;User ID=devshare;Password=Df34@#fd;" + }, "Logging": { "LogLevel": { From 13e18dda37ab696905c576bb7921935c122e53a2 Mon Sep 17 00:00:00 2001 From: Xu-create-ops Date: Tue, 15 Jul 2025 15:30:33 +0800 Subject: [PATCH 06/12] fix code --- dev-share-api/Controllers/ApiController.cs | 101 +++--------------- dev-share-api/Executor/ShareChainExecutor.cs | 4 +- .../Factory/DevShareDbContextFactory.cs | 46 ++++---- .../Handle/DatabaseShareChainHandle.cs | 22 ++-- .../Handle/ExtractShareChainHandle.cs | 91 ++++++++++++++++ .../Handle/SummarizeShareChainHandle.cs | 9 +- dev-share-api/Models/ResourceShareContext.cs | 5 +- dev-share-api/Models/UrlRequest.cs | 2 +- dev-share-api/Services/DependencyInjection.cs | 1 + dev-share-api/Services/ResourceService.cs | 2 +- dev-share-api/dev-share-api.csproj | 1 - 11 files changed, 157 insertions(+), 127 deletions(-) create mode 100644 dev-share-api/Handle/ExtractShareChainHandle.cs diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index 7400c44..b2c9d2e 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -7,6 +7,7 @@ using System.Text; using Executor; using System.Collections.Concurrent; +using Newtonsoft.Json.Linq; namespace Controllers; @@ -15,29 +16,26 @@ namespace Controllers; [Route("api")] public class ExtractController : ControllerBase { - private readonly ISummaryService _summaryService; + private readonly IEmbeddingService _embeddingService; private readonly IVectorService _vectorService; - private readonly ShareChainExecutor _shareChainExecutor; - private readonly IUserInsightService _userInsightService; private readonly IResourceService _resourceService; private readonly IOnlineResearchService _onlineResearchService; + private readonly IServiceScopeFactory _scopeFactory; private static readonly ConcurrentDictionary TaskStore = new(); public ExtractController( - ISummaryService summaryService, IEmbeddingService embeddingService, IVectorService vectorService, - IUserInsightService _userInsightService, - IResourceService _resourceService, - ShareChainExecutor shareChainExecutor, - IOnlineResearchService onlineResearchService) + IOnlineResearchService onlineResearchService, + IServiceScopeFactory scopeFactory, + IResourceService resourceService) { - _summaryService = summaryService; _embeddingService = embeddingService; _vectorService = vectorService; - _shareChainExecutor = shareChainExecutor; _onlineResearchService = onlineResearchService; + _scopeFactory = scopeFactory; + _resourceService = resourceService; } [HttpPost("share")] @@ -71,26 +69,14 @@ public async Task Share([FromBody] UrlRequest request) _ = Task.Run(async () => { + using var scope = _scopeFactory.CreateScope(); + var executor = scope.ServiceProvider.GetRequiredService(); try { - Console.WriteLine($"Extracting: {url}"); - var result = TryHtmlAgilityPack(url); - if (string.IsNullOrWhiteSpace(result)) - result = await TryPlaywright(url); - if (string.IsNullOrWhiteSpace(result)) - throw new Exception("Content extraction failed."); - - var prompt = new StringBuilder() - .AppendLine("You will receive an input text and your task is to summarize the article in no more than 100 words.") - .AppendLine("Only return the summary. Do not include any explanation.") - .AppendLine("# Article content:") - .AppendLine($"{result}") - .ToString(); - - await _shareChainExecutor.ExecuteAsync(new ResourceShareContext + await executor.ExecuteAsync(new ResourceShareContext { Url = url, - Prompt = prompt + Insight = request.Comment }); task.Status = "success"; @@ -162,7 +148,8 @@ public async Task Search([FromBody] SearchRequest request) foreach (var item in rerankResults) { var contentId = item.ContentId; - var resource = await _resourceService.GetResourceById(long.Parse(contentId)); + var obj = JObject.Parse(contentId); + var resource = await _resourceService.GetResourceById(long.Parse(obj["num"].ToString())); if (resource != null) { results.Add(resource); @@ -216,66 +203,6 @@ public async Task ShareInsight([FromBody] ShareInsightRequest req return Ok(); } - - - private string? TryHtmlAgilityPack(string url) - { - try - { - var web = new HtmlWeb - { - // 设置 User-Agent,防止部分网站屏蔽爬虫 - UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + - "AppleWebKit/537.36 (KHTML, like Gecko) " + - "Chrome/120.0.0.0 Safari/537.36" - }; - var doc = web.Load(url); - - //TODO 编码问题 - // using var client = new HttpClient(); - // var bytes = client.GetByteArrayAsync(url).Result; - // var html = System.Text.Encoding.UTF8.GetString(bytes); - // var doc = new HtmlDocument(); - // doc.LoadHtml(html); - - - // 提取网页标题 - var titleNode = doc.DocumentNode.SelectSingleNode("//title"); - Console.WriteLine("Title: " + titleNode?.InnerText); - - // 提取所有段落文本 - var paragraphs = doc.DocumentNode.SelectNodes("//p"); - if (paragraphs == null) return null; - - var title = titleNode?.InnerText.Trim() ?? ""; - var paragraphText = string.Join("\n", paragraphs - .Select(p => p.InnerText.Trim()) - .Where(t => !string.IsNullOrWhiteSpace(t))); - - return title + "\n\n" + paragraphText; - } - catch - { - return null; - } - } - - // 使用 Playwright 模拟浏览器加载网页并提取段落内容(用于 CSR 页面) - private async Task TryPlaywright(string url) - { - // 启动 Playwright 浏览器(无头模式) - using var playwright = await Playwright.CreateAsync(); - await using var browser = await playwright.Chromium.LaunchAsync(new() { Headless = true }); - - // 打开新页面并导航到目标地址,等待网络空闲(页面渲染完成) - var page = await browser.NewPageAsync(); - await page.GotoAsync(url, new PageGotoOptions { WaitUntil = WaitUntilState.NetworkIdle }); - - // 提取所有

元素的 innerText,去除空行 - var text = await page.EvalOnSelectorAllAsync("p", "els => els.map(e => e.innerText).filter(t => t.trim().length > 0)"); - return string.Join("\n", text); - } - //todo make sure the return data from service is List and List private static List GetRerankedList(List resources, List insights) { diff --git a/dev-share-api/Executor/ShareChainExecutor.cs b/dev-share-api/Executor/ShareChainExecutor.cs index fc32d11..7ad3fc2 100644 --- a/dev-share-api/Executor/ShareChainExecutor.cs +++ b/dev-share-api/Executor/ShareChainExecutor.cs @@ -17,7 +17,7 @@ public ShareChainExecutor(IEnumerable handlers, IResourceServ public async Task ExecuteAsync(ResourceShareContext context) { - preHandle(context); + await preHandle(context); foreach (var handler in _handlers) { // Check if the handler should be skipped @@ -30,7 +30,7 @@ public async Task ExecuteAsync(ResourceShareContext context) } } - private async void preHandle(ResourceShareContext context) + private async Task preHandle(ResourceShareContext context) { ResourceDto resourceDto = await _resourceService.GetResourceByUrl(UrlManageUtil.NormalizeUrl(context.Url)); if (resourceDto != null) diff --git a/dev-share-api/Factory/DevShareDbContextFactory.cs b/dev-share-api/Factory/DevShareDbContextFactory.cs index 753c45d..746c2ea 100644 --- a/dev-share-api/Factory/DevShareDbContextFactory.cs +++ b/dev-share-api/Factory/DevShareDbContextFactory.cs @@ -1,28 +1,28 @@ -// using Microsoft.EntityFrameworkCore; -// using Microsoft.EntityFrameworkCore.Design; -// using Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Design; +using Data; -// public class DevShareDbContextFactory : IDesignTimeDbContextFactory -// { -// public DevShareDbContext CreateDbContext(string[] args) -// { -// var basePath = Directory.GetCurrentDirectory(); +public class DevShareDbContextFactory : IDesignTimeDbContextFactory +{ + public DevShareDbContext CreateDbContext(string[] args) + { + var basePath = Directory.GetCurrentDirectory(); -// var environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Development"; + var environment = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "Development"; -// var configuration = new ConfigurationBuilder() -// .SetBasePath(basePath) -// .AddJsonFile("appsettings.json", optional: false) -// .AddJsonFile($"appsettings.{environment}.json", optional: true) -// .AddJsonFile("appsettings.local.json", optional: true) -// .AddEnvironmentVariables() -// .Build(); + var configuration = new ConfigurationBuilder() + .SetBasePath(basePath) + .AddJsonFile("appsettings.json", optional: false) + .AddJsonFile($"appsettings.{environment}.json", optional: true) + .AddJsonFile("appsettings.local.json", optional: true) + .AddEnvironmentVariables() + .Build(); -// var connectionString = configuration.GetConnectionString("DefaultConnection") -// ?? throw new InvalidOperationException("Connection string 'DefaultConnection' not found."); + var connectionString = configuration.GetConnectionString("DefaultConnection") + ?? throw new InvalidOperationException("Connection string 'DefaultConnection' not found."); -// var optionsBuilder = new DbContextOptionsBuilder(); -// optionsBuilder.UseSqlServer(connectionString); -// return new DevShareDbContext(optionsBuilder.Options); -// } -// } \ No newline at end of file + var optionsBuilder = new DbContextOptionsBuilder(); + optionsBuilder.UseSqlServer(connectionString); + return new DevShareDbContext(optionsBuilder.Options); + } +} \ No newline at end of file diff --git a/dev-share-api/Handle/DatabaseShareChainHandle.cs b/dev-share-api/Handle/DatabaseShareChainHandle.cs index 256efe7..49c3fd7 100644 --- a/dev-share-api/Handle/DatabaseShareChainHandle.cs +++ b/dev-share-api/Handle/DatabaseShareChainHandle.cs @@ -23,16 +23,12 @@ protected override void Validate(ResourceShareContext context) protected override async Task ProcessAsync(ResourceShareContext context) { - var resourceId = IdGeneratorUtil.GetNextId(); + + var resourceId = 0L; if (context.ExistingResource == null) { - await _vectorService.UpsertResourceAsync( - context.Url!, - resourceId.ToString(), - context.Summary!, - context.ResourceVectors!); - + resourceId = IdGeneratorUtil.GetNextId(); await _resourceService.AddResourceAsync( new ResourceDto { @@ -40,8 +36,17 @@ await _resourceService.AddResourceAsync( Content = context.Summary, Url = context.Url }); + await _vectorService.UpsertResourceAsync( + resourceId.ToString(), + context.Url!, + context.Summary!, + context.ResourceVectors!); } - + else + { + resourceId = context.ExistingResource.ResourceId; + } + await _vectorService.UpsertInsightAsync( IdGeneratorUtil.GetNextId().ToString(), context.Url!, @@ -57,5 +62,6 @@ await _userInsightService.AddUserInsightAsync( }); return HandlerResult.Success(); + } } \ No newline at end of file diff --git a/dev-share-api/Handle/ExtractShareChainHandle.cs b/dev-share-api/Handle/ExtractShareChainHandle.cs new file mode 100644 index 0000000..3069c59 --- /dev/null +++ b/dev-share-api/Handle/ExtractShareChainHandle.cs @@ -0,0 +1,91 @@ +using Models; +using HtmlAgilityPack; +using Microsoft.Playwright; + +namespace Services; + +public class ExtractShareChainHandle : BaseShareChainHandle +{ + + protected override void Validate(ResourceShareContext context) + { + // if (context.ExistingResource == null && string.IsNullOrWhiteSpace(context.Prompt)) + // throw new ArgumentNullException(nameof(context.Prompt), "Prompt cannot be null or empty."); + } + + public override async Task IsSkip(ResourceShareContext context) + { + return context.ExistingResource != null; + } + + protected override async Task ProcessAsync(ResourceShareContext context) + { + var url = context.Url; + Console.WriteLine($"Extracting: {url}"); + var result = TryHtmlAgilityPack(url); + if (string.IsNullOrWhiteSpace(result)) + result = await TryPlaywright(url); + if (string.IsNullOrWhiteSpace(result)) + throw new Exception("Content extraction failed."); + context.ExtractResult = result; + return HandlerResult.Success(); + } + + private string? TryHtmlAgilityPack(string url) + { + try + { + var web = new HtmlWeb + { + // 设置 User-Agent,防止部分网站屏蔽爬虫 + UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/120.0.0.0 Safari/537.36" + }; + var doc = web.Load(url); + + //TODO 编码问题 + // using var client = new HttpClient(); + // var bytes = client.GetByteArrayAsync(url).Result; + // var html = System.Text.Encoding.UTF8.GetString(bytes); + // var doc = new HtmlDocument(); + // doc.LoadHtml(html); + + + // 提取网页标题 + var titleNode = doc.DocumentNode.SelectSingleNode("//title"); + Console.WriteLine("Title: " + titleNode?.InnerText); + + // 提取所有段落文本 + var paragraphs = doc.DocumentNode.SelectNodes("//p"); + if (paragraphs == null) return null; + + var title = titleNode?.InnerText.Trim() ?? ""; + var paragraphText = string.Join("\n", paragraphs + .Select(p => p.InnerText.Trim()) + .Where(t => !string.IsNullOrWhiteSpace(t))); + + return title + "\n\n" + paragraphText; + } + catch + { + return null; + } + } + + // 使用 Playwright 模拟浏览器加载网页并提取段落内容(用于 CSR 页面) + private async Task TryPlaywright(string url) + { + // 启动 Playwright 浏览器(无头模式) + using var playwright = await Playwright.CreateAsync(); + await using var browser = await playwright.Chromium.LaunchAsync(new() { Headless = true }); + + // 打开新页面并导航到目标地址,等待网络空闲(页面渲染完成) + var page = await browser.NewPageAsync(); + await page.GotoAsync(url, new PageGotoOptions { WaitUntil = WaitUntilState.NetworkIdle }); + + // 提取所有

元素的 innerText,去除空行 + var text = await page.EvalOnSelectorAllAsync("p", "els => els.map(e => e.innerText).filter(t => t.trim().length > 0)"); + return string.Join("\n", text); + } +} \ No newline at end of file diff --git a/dev-share-api/Handle/SummarizeShareChainHandle.cs b/dev-share-api/Handle/SummarizeShareChainHandle.cs index a484b16..e2b5ec3 100644 --- a/dev-share-api/Handle/SummarizeShareChainHandle.cs +++ b/dev-share-api/Handle/SummarizeShareChainHandle.cs @@ -1,3 +1,4 @@ +using System.Text; using Models; namespace Services; @@ -25,7 +26,13 @@ public override async Task IsSkip(ResourceShareContext context) protected override async Task ProcessAsync(ResourceShareContext context) { - var summary = await _summaryService.SummarizeAsync(context.Prompt); + var prompt = new StringBuilder() + .AppendLine("You will receive an input text and your task is to summarize the article in no more than 100 words.") + .AppendLine("Only return the summary. Do not include any explanation.") + .AppendLine("# Article content:") + .AppendLine($"{context.ExtractResult}") + .ToString(); + var summary = await _summaryService.SummarizeAsync(prompt); context.Summary = summary; return HandlerResult.Success(); } diff --git a/dev-share-api/Models/ResourceShareContext.cs b/dev-share-api/Models/ResourceShareContext.cs index e4b4967..fc8c1de 100644 --- a/dev-share-api/Models/ResourceShareContext.cs +++ b/dev-share-api/Models/ResourceShareContext.cs @@ -9,8 +9,7 @@ public class ResourceShareContext public string? Summary { get; set; } public Dictionary? ResourceVectors { get; set; } public Dictionary? InsightVectors { get; set; } - public string? Prompt { get; set; } public ResourceDto? ExistingResource { get; set; } - - + public string? ExtractResult { get; set; } + } \ No newline at end of file diff --git a/dev-share-api/Models/UrlRequest.cs b/dev-share-api/Models/UrlRequest.cs index 3a376a5..964b574 100644 --- a/dev-share-api/Models/UrlRequest.cs +++ b/dev-share-api/Models/UrlRequest.cs @@ -4,5 +4,5 @@ public class UrlRequest { public string? Url { get; set; } - public string? Prompt { get; set; } + public string? Comment { get; set; } } \ No newline at end of file diff --git a/dev-share-api/Services/DependencyInjection.cs b/dev-share-api/Services/DependencyInjection.cs index 8f6078a..91b5b1f 100644 --- a/dev-share-api/Services/DependencyInjection.cs +++ b/dev-share-api/Services/DependencyInjection.cs @@ -59,6 +59,7 @@ public static IServiceCollection AddApplicationServices(this IServiceCollection { //Not allowed to alter the sort of the following code. services.AddScoped(); + services.AddScoped(); services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/dev-share-api/Services/ResourceService.cs b/dev-share-api/Services/ResourceService.cs index 5bafeda..2f1fb0c 100644 --- a/dev-share-api/Services/ResourceService.cs +++ b/dev-share-api/Services/ResourceService.cs @@ -29,7 +29,7 @@ public async Task AddResourceAsync(ResourceDto resourceDto) public async Task GetResourceByUrl(string normalizeUrl) { return await _dbContext.Resources - .Where(resource => resource.Url == normalizeUrl) + .Where(resource => resource.NormalizeUrl == normalizeUrl) .Select(resource => new ResourceDto { ResourceId = resource.ResourceId, diff --git a/dev-share-api/dev-share-api.csproj b/dev-share-api/dev-share-api.csproj index 15a2a86..2839a16 100644 --- a/dev-share-api/dev-share-api.csproj +++ b/dev-share-api/dev-share-api.csproj @@ -22,7 +22,6 @@ - From 773127e0e0b740217c1142076a6c68811ff047c6 Mon Sep 17 00:00:00 2001 From: Travis Date: Wed, 16 Jul 2025 00:16:06 +1000 Subject: [PATCH 07/12] fix: renaming & fix the issue with vector return response --- dev-share-api.Tests/TestHost.cs | 3 ++- dev-share-api/Controllers/ApiController.cs | 26 +++++++++---------- dev-share-api/Models/Rerank.cs | 2 +- .../Models/{ResourceDTO.cs => ResourceDto.cs} | 2 -- dev-share-api/Models/UrlRequest.cs | 2 +- .../{UserInsightDTO.cs => UserInsightDto.cs} | 0 .../Services/OnlineResearchService.cs | 1 - dev-share-api/Services/VectorService.cs | 7 +++-- 8 files changed, 20 insertions(+), 23 deletions(-) rename dev-share-api/Models/{ResourceDTO.cs => ResourceDto.cs} (94%) rename dev-share-api/Models/{UserInsightDTO.cs => UserInsightDto.cs} (100%) diff --git a/dev-share-api.Tests/TestHost.cs b/dev-share-api.Tests/TestHost.cs index 2df6d03..c3041a0 100644 --- a/dev-share-api.Tests/TestHost.cs +++ b/dev-share-api.Tests/TestHost.cs @@ -10,7 +10,8 @@ public static IServiceProvider BuildTestServiceProvider() .Build(); var services = new ServiceCollection(); - services.AddApplicationServices(config); + services.AddInfrastructureServices(config) + .AddApplicationServices(); return services.BuildServiceProvider(); } diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index b2c9d2e..a57de5e 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -76,7 +76,7 @@ public async Task Share([FromBody] UrlRequest request) await executor.ExecuteAsync(new ResourceShareContext { Url = url, - Insight = request.Comment + Insight = request.Insight }); task.Status = "success"; @@ -135,7 +135,7 @@ public async Task Search([FromBody] SearchRequest request) || insightResults.Count == 0) { // Fallback to online research - var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text,3); + var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text, 3); return Ok(new { source = "online", result = onlineResult }); } else @@ -147,8 +147,8 @@ public async Task Search([FromBody] SearchRequest request) var results = new List(); foreach (var item in rerankResults) { - var contentId = item.ContentId; - var obj = JObject.Parse(contentId); + var resourceId = item.ResourceId; + var obj = JObject.Parse(resourceId); var resource = await _resourceService.GetResourceById(long.Parse(obj["num"].ToString())); if (resource != null) { @@ -203,11 +203,11 @@ public async Task ShareInsight([FromBody] ShareInsightRequest req return Ok(); } - //todo make sure the return data from service is List and List + //todo make sure the return data from service is List and List private static List GetRerankedList(List resources, List insights) { // averge comment.score - var commentGroups = insights + var insightGroups = insights .GroupBy(c => c.ResourceId) .ToDictionary( g => g.Key, @@ -215,21 +215,21 @@ private static List GetRerankedList(List resources, L ); // content.score find table - var contentScores = resources + var resourceScores = resources .ToDictionary(c => c.Id, c => c.Score); // union all contentId - var allContentIds = contentScores.Keys - .Union(commentGroups.Keys) + var allResourceIds = resourceScores.Keys + .Union(insightGroups.Keys) .Distinct(); - var result = allContentIds + var result = allResourceIds .Select(id => new Rerank { - ContentId = id, + ResourceId = id, Score = - (contentScores.TryGetValue(id, out var cScore) ? cScore : 0) * 0.7 + - (commentGroups.TryGetValue(id, out var comAvg) ? comAvg : 0) * 0.3 + (resourceScores.TryGetValue(id, out var rScore) ? rScore : 0) * 0.7 + + (insightGroups.TryGetValue(id, out var iAvg) ? iAvg : 0) * 0.3 }) .OrderByDescending(r => r.Score) .ToList(); diff --git a/dev-share-api/Models/Rerank.cs b/dev-share-api/Models/Rerank.cs index 4776228..dd8bacb 100644 --- a/dev-share-api/Models/Rerank.cs +++ b/dev-share-api/Models/Rerank.cs @@ -2,6 +2,6 @@ namespace Models; public class Rerank { - public string ContentId { get; set; } + public string ResourceId { get; set; } public double Score { get; set; } } \ No newline at end of file diff --git a/dev-share-api/Models/ResourceDTO.cs b/dev-share-api/Models/ResourceDto.cs similarity index 94% rename from dev-share-api/Models/ResourceDTO.cs rename to dev-share-api/Models/ResourceDto.cs index 7ad49a6..0ba62a9 100644 --- a/dev-share-api/Models/ResourceDTO.cs +++ b/dev-share-api/Models/ResourceDto.cs @@ -1,5 +1,3 @@ -using Entities; - namespace Models; public class ResourceDto diff --git a/dev-share-api/Models/UrlRequest.cs b/dev-share-api/Models/UrlRequest.cs index 964b574..a1a2984 100644 --- a/dev-share-api/Models/UrlRequest.cs +++ b/dev-share-api/Models/UrlRequest.cs @@ -4,5 +4,5 @@ public class UrlRequest { public string? Url { get; set; } - public string? Comment { get; set; } + public string? Insight { get; set; } } \ No newline at end of file diff --git a/dev-share-api/Models/UserInsightDTO.cs b/dev-share-api/Models/UserInsightDto.cs similarity index 100% rename from dev-share-api/Models/UserInsightDTO.cs rename to dev-share-api/Models/UserInsightDto.cs diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index 504b753..27bc8b4 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -1,5 +1,4 @@ using Azure.AI.OpenAI; -using Microsoft.Extensions.Options; using System.Text.Json; using Models; using OpenAI.Chat; diff --git a/dev-share-api/Services/VectorService.cs b/dev-share-api/Services/VectorService.cs index 2a94716..a0c7c90 100644 --- a/dev-share-api/Services/VectorService.cs +++ b/dev-share-api/Services/VectorService.cs @@ -1,7 +1,6 @@ using Models; using Qdrant.Client; using Qdrant.Client.Grpc; -using static Qdrant.Client.Grpc.Conditions; namespace Services; @@ -160,7 +159,7 @@ public async Task> SearchResourceAsync(string query, int var payload = result.Payload; return new VectorResourceDto { - Id = result.Id.ToString(), + Id = result.Id.Num.ToString(), Url = payload.TryGetValue("url", out var urlVal) && urlVal.KindCase == Value.KindOneofCase.StringValue ? urlVal.StringValue : string.Empty, Content = payload.TryGetValue("content", out var contentVal) && contentVal.KindCase == Value.KindOneofCase.StringValue ? contentVal.StringValue : string.Empty, Score = result.Score @@ -182,7 +181,7 @@ public async Task> SearchInsightAsync(string query, int t var insightResults = await _client.QueryAsync( - collectionName: _resourceCollection, + collectionName: _insightCollection, prefetch: prefetch, query: Fusion.Rrf, limit: (ulong)topK, @@ -196,7 +195,7 @@ public async Task> SearchInsightAsync(string query, int t var payload = result.Payload; return new VectorInsightDto { - Id = result.Id.ToString(), + Id = result.Id.Num.ToString(), Url = payload.TryGetValue("url", out var urlVal) && urlVal.KindCase == Value.KindOneofCase.StringValue ? urlVal.StringValue : string.Empty, Content = payload.TryGetValue("content", out var contentVal) && contentVal.KindCase == Value.KindOneofCase.StringValue ? contentVal.StringValue : string.Empty, ResourceId = payload.TryGetValue("resourceId", out var resourceIdVal) && resourceIdVal.KindCase == Value.KindOneofCase.StringValue ? resourceIdVal.StringValue : string.Empty, From 6f07080052e5507ddb3dd81ed06cbbbc372872ab Mon Sep 17 00:00:00 2001 From: Travis Date: Wed, 16 Jul 2025 00:27:33 +1000 Subject: [PATCH 08/12] fix: fix the parsing for onlinesearch service --- dev-share-api/Controllers/ApiController.cs | 2 +- .../Services/OnlineResearchService.cs | 23 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index a57de5e..c24d275 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -128,7 +128,7 @@ public async Task Search([FromBody] SearchRequest request) query: request.Text, topK: request.TopRelatives); - + resourceResults = null; if (resourceResults == null || resourceResults.Count == 0 || insightResults == null diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index 27bc8b4..6184f76 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -13,21 +13,16 @@ public interface IOnlineResearchService public class OnlineResearchService : IOnlineResearchService { private readonly AzureOpenAIClient _client; - private readonly string _deploymentName = "gpt-4o-mini"; // Set this to your deployment name - // private readonly ILogger _logger; + private readonly string _deploymentName = "gpt-4o-mini"; private static readonly JsonSerializerOptions _jsonOptions = new() { PropertyNameCaseInsensitive = true, WriteIndented = true }; - public OnlineResearchService( - AzureOpenAIClient openAIClient - // ILogger logger - ) + public OnlineResearchService(AzureOpenAIClient openAIClient) { _client = openAIClient ?? throw new ArgumentNullException(nameof(openAIClient)); - // _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } public async Task> PerformOnlineResearchAsync(string query, int topK = 3) @@ -44,7 +39,6 @@ public async Task> PerformOnlineResearchAsync(str } catch (Exception ex) { - // _logger.LogError(ex, "Error performing online research for query: {Query}", query); throw; } } @@ -67,9 +61,15 @@ private async Task> ParseResponseToVectorResource try { - // Try parsing as array first + // Clean the response by removing Markdown code block and escapes + var cleanedResponse = response + .Replace("```json", "") + .Replace("```", "") + .Replace("\\n", "") + .Trim(); + var results = await Task.Run(() => - JsonSerializer.Deserialize(response, _jsonOptions)); + JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); if (results?.Any() == true) { @@ -78,7 +78,7 @@ private async Task> ParseResponseToVectorResource // Try parsing as single object if array fails var singleResult = await Task.Run(() => - JsonSerializer.Deserialize(response, _jsonOptions)); + JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); return singleResult != null ? new[] { singleResult } @@ -86,7 +86,6 @@ private async Task> ParseResponseToVectorResource } catch (JsonException ex) { - // _logger.LogWarning(ex, "Failed to parse OpenAI response: {Response}", response); return new[] { CreateFallbackDto(response) }; } } From 8539acf4cc56c970990137af56662848b3bf7289 Mon Sep 17 00:00:00 2001 From: Travis Date: Thu, 17 Jul 2025 18:26:20 +1000 Subject: [PATCH 09/12] update: fix online search response parsing issue --- dev-share-api/Controllers/ApiController.cs | 5 ++-- dev-share-api/Models/ResourceDto.cs | 8 +++--- .../Services/OnlineResearchService.cs | 28 +++++++++---------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index c24d275..017933e 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -128,15 +128,14 @@ public async Task Search([FromBody] SearchRequest request) query: request.Text, topK: request.TopRelatives); - resourceResults = null; if (resourceResults == null || resourceResults.Count == 0 || insightResults == null || insightResults.Count == 0) { // Fallback to online research - var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text, 3); - return Ok(new { source = "online", result = onlineResult }); + var onlineResult = await _onlineResearchService.PerformOnlineResearchAsync(request.Text, request.TopRelatives); + return Ok(new { source = "online", result = onlineResult.ToList() }); } else { diff --git a/dev-share-api/Models/ResourceDto.cs b/dev-share-api/Models/ResourceDto.cs index 0ba62a9..8fecada 100644 --- a/dev-share-api/Models/ResourceDto.cs +++ b/dev-share-api/Models/ResourceDto.cs @@ -3,8 +3,8 @@ namespace Models; public class ResourceDto { public long ResourceId { get; set; } - public string Url { get; set; } - public string NormalizeUrl { get; set; } - public string Content { get; set; } - public List UserInsights { get; set; } + public required string Url { get; set; } + public string? NormalizeUrl { get; set; } + public required string Content { get; set; } + public List? UserInsights { get; set; } } \ No newline at end of file diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index 6184f76..310154d 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -7,7 +7,7 @@ namespace Services; public interface IOnlineResearchService { - Task> PerformOnlineResearchAsync(string query, int topK); + Task> PerformOnlineResearchAsync(string query, int topK); } public class OnlineResearchService : IOnlineResearchService @@ -25,7 +25,7 @@ public OnlineResearchService(AzureOpenAIClient openAIClient) _client = openAIClient ?? throw new ArgumentNullException(nameof(openAIClient)); } - public async Task> PerformOnlineResearchAsync(string query, int topK = 3) + public async Task> PerformOnlineResearchAsync(string query, int topK = 3) { if (string.IsNullOrWhiteSpace(query)) { @@ -52,7 +52,7 @@ private async Task GetOpenAIResponseAsync(string query, int topK) return response.Content?.FirstOrDefault()?.Text ?? string.Empty; } - private async Task> ParseResponseToVectorResourceDtos(string response) + private static async Task> ParseResponseToVectorResourceDtos(string response) { if (string.IsNullOrWhiteSpace(response)) { @@ -66,10 +66,11 @@ private async Task> ParseResponseToVectorResource .Replace("```json", "") .Replace("```", "") .Replace("\\n", "") + .Replace("\n", "") .Trim(); var results = await Task.Run(() => - JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); + JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); if (results?.Any() == true) { @@ -78,7 +79,7 @@ private async Task> ParseResponseToVectorResource // Try parsing as single object if array fails var singleResult = await Task.Run(() => - JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); + JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); return singleResult != null ? new[] { singleResult } @@ -97,15 +98,11 @@ private static string GeneratePrompt(string query, int topK) [ {{ - ""Id"": ""unique-id-123"", ""Content"": ""First concise, factual answer here."", - ""Score"": 0.95, ""Url"": ""https://relevant-source-1.com"" }}, {{ - ""Id"": ""unique-id-456"", ""Content"": ""Second concise, factual answer here."", - ""Score"": 0.85, ""Url"": ""https://relevant-source-2.com"" }} ] @@ -115,11 +112,12 @@ private static string GeneratePrompt(string query, int topK) Return exactly {topK} JSON objects in an array. Ensure each answer is unique and relevant."; } - private static VectorResourceDto CreateFallbackDto(string content) => new() + private static ResourceDto CreateFallbackDto(string fallBackContent) { - Id = IdGeneratorUtil.GetNextId().ToString(), - Content = content, - Score = 0, - Url = string.Empty - }; + return new() + { + Content = fallBackContent, + Url = string.Empty + }; + } } From 6c1eb416e7f53d968b3c90f80fd72e5663cc29b6 Mon Sep 17 00:00:00 2001 From: Xu-create-ops Date: Thu, 17 Jul 2025 20:12:18 +0800 Subject: [PATCH 10/12] fix search resource --- dev-share-api/Controllers/ApiController.cs | 9 +++++++-- dev-share-ui/services/search-service.tsx | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index 017933e..3c6ce21 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -7,6 +7,7 @@ using System.Text; using Executor; using System.Collections.Concurrent; +using System.Text.Json; using Newtonsoft.Json.Linq; @@ -141,14 +142,18 @@ public async Task Search([FromBody] SearchRequest request) { //2. do rerank and get reranked list var rerankResults = GetRerankedList(resourceResults, insightResults); + + Console.WriteLine(JsonSerializer.Serialize(resourceResults, new JsonSerializerOptions + { + WriteIndented = true // 美化格式 + })); //3. get finalResults from sql server by id var results = new List(); foreach (var item in rerankResults) { var resourceId = item.ResourceId; - var obj = JObject.Parse(resourceId); - var resource = await _resourceService.GetResourceById(long.Parse(obj["num"].ToString())); + var resource = await _resourceService.GetResourceById(long.Parse(resourceId)); if (resource != null) { results.Add(resource); diff --git a/dev-share-ui/services/search-service.tsx b/dev-share-ui/services/search-service.tsx index 4dc6b4d..28cd704 100644 --- a/dev-share-ui/services/search-service.tsx +++ b/dev-share-ui/services/search-service.tsx @@ -14,7 +14,8 @@ export async function searchResources(query: string): Promise { if (!result.ok) throw new Error(`Search failed (${result.status})`); - const dtos: VectorSearchResultDTO[] = await result.json(); + const responseJson = await result.json(); + const dtos: VectorSearchResultDTO[] = responseJson.result; return dtos.map(dto => ({ id: crypto.randomUUID(), From 6fdbf55525a6d10154f246a5fe6cd4278ae0bd8b Mon Sep 17 00:00:00 2001 From: Xu-create-ops Date: Thu, 17 Jul 2025 21:12:52 +0800 Subject: [PATCH 11/12] fix code --- dev-share-api/Controllers/ApiController.cs | 5 ----- dev-share-api/Program.cs | 2 +- dev-share-api/Services/DependencyInjection.cs | 5 +---- dev-share-api/Services/VectorService.cs | 2 +- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index 3c6ce21..101d5b3 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -142,11 +142,6 @@ public async Task Search([FromBody] SearchRequest request) { //2. do rerank and get reranked list var rerankResults = GetRerankedList(resourceResults, insightResults); - - Console.WriteLine(JsonSerializer.Serialize(resourceResults, new JsonSerializerOptions - { - WriteIndented = true // 美化格式 - })); //3. get finalResults from sql server by id var results = new List(); diff --git a/dev-share-api/Program.cs b/dev-share-api/Program.cs index 5c98812..18f818f 100644 --- a/dev-share-api/Program.cs +++ b/dev-share-api/Program.cs @@ -9,7 +9,7 @@ // optional - if you don't want to have 'appsettings.local.json' for debugging purpose // Load secrets in development before building -// if (builder.Environment.IsDevelopment()) builder.Configuration.AddUserSecrets(); +if (builder.Environment.IsDevelopment()) builder.Configuration.AddUserSecrets(); // Service Registration builder.Services.AddControllers(); diff --git a/dev-share-api/Services/DependencyInjection.cs b/dev-share-api/Services/DependencyInjection.cs index 91b5b1f..aa32bf7 100644 --- a/dev-share-api/Services/DependencyInjection.cs +++ b/dev-share-api/Services/DependencyInjection.cs @@ -40,10 +40,7 @@ public static IServiceCollection AddInfrastructureServices( // Database services.AddDbContext(options => - { - options.UseSqlServer(configuration.GetConnectionString("DefaultConnection")); - Console.Write("database"); - }); + options.UseSqlServer(configuration.GetConnectionString("DefaultConnection"))); // HTTP Client diff --git a/dev-share-api/Services/VectorService.cs b/dev-share-api/Services/VectorService.cs index a0c7c90..f17f4d4 100644 --- a/dev-share-api/Services/VectorService.cs +++ b/dev-share-api/Services/VectorService.cs @@ -185,7 +185,7 @@ public async Task> SearchInsightAsync(string query, int t prefetch: prefetch, query: Fusion.Rrf, limit: (ulong)topK, - scoreThreshold: (float)0.7, //todo: make this dynamic + scoreThreshold: (float)0.9, //todo: make this dynamic payloadSelector: true, vectorsSelector: false ); From 6381a3ba92f1f45660b45e8d5dc8fd71b7616037 Mon Sep 17 00:00:00 2001 From: Xu-create-ops Date: Thu, 17 Jul 2025 21:21:28 +0800 Subject: [PATCH 12/12] recover setting. --- dev-share-api/appsettings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-share-api/appsettings.json b/dev-share-api/appsettings.json index 47a5573..0b38196 100644 --- a/dev-share-api/appsettings.json +++ b/dev-share-api/appsettings.json @@ -1,6 +1,6 @@ { "ConnectionStrings": { - "DefaultConnection": "Server=tcp:sqldb-dev-share.database.windows.net,1433;Initial Catalog=sqldb-dev-share;Persist Security Info=False;User ID=devshare;Password=Df34@#fd;" + "DefaultConnection": "" }, "Logging": {