Skip to content

Commit 32cdede

Browse files
committed
duplicate fixes.
1 parent ee9bfc1 commit 32cdede

File tree

1 file changed

+65
-2
lines changed

1 file changed

+65
-2
lines changed

dotnetv4/CloudWatchLogs/LargeQuery/Scenarios/LargeQueryWorkflow.cs

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,27 @@ public static async Task ExecuteLargeQuery()
353353
Console.WriteLine();
354354
Console.WriteLine($"Queries finished in {stopwatch.Elapsed.TotalSeconds:F3} seconds.");
355355
Console.WriteLine($"Total logs found: {allResults.Count}");
356+
357+
// Check for duplicates
358+
Console.WriteLine();
359+
Console.WriteLine("Checking for duplicate logs...");
360+
var duplicates = FindDuplicateLogs(allResults);
361+
if (duplicates.Count > 0)
362+
{
363+
Console.WriteLine($"WARNING: Found {duplicates.Count} duplicate log entries!");
364+
Console.WriteLine("Duplicate entries (showing first 10):");
365+
foreach (var dup in duplicates.Take(10))
366+
{
367+
Console.WriteLine($" [{dup.Timestamp}] {dup.Message} (appears {dup.Count} times)");
368+
}
369+
370+
var uniqueCount = allResults.Count - duplicates.Sum(d => d.Count - 1);
371+
Console.WriteLine($"Unique logs: {uniqueCount}");
372+
}
373+
else
374+
{
375+
Console.WriteLine("No duplicates found. All logs are unique.");
376+
}
356377
Console.WriteLine();
357378

358379
var viewSample = !_interactive || GetYesNoResponse("Would you like to see a sample of the logs? (y/n) ");
@@ -420,8 +441,14 @@ private static async Task<List<List<ResultField>>> PerformLargeQuery(
420441
var offsetLastLogDate = lastLogDate.AddMilliseconds(1);
421442
Console.WriteLine($" -> Offset timestamp (last + 1ms): {offsetLastLogDate:yyyy-MM-ddTHH:mm:ss.fffZ} ({offsetLastLogDate.ToUnixTimeSeconds()}s)");
422443

423-
// Convert back to seconds for the API
444+
// Convert to seconds, but round UP to the next second to avoid overlapping with logs in the same second
445+
// This ensures we don't re-query logs that share the same second as the last log
424446
var offsetLastLogTime = offsetLastLogDate.ToUnixTimeSeconds();
447+
if (offsetLastLogDate.Millisecond > 0)
448+
{
449+
offsetLastLogTime++; // Move to the next full second
450+
Console.WriteLine($" -> Adjusted to next full second: {offsetLastLogTime}s ({DateTimeOffset.FromUnixTimeSeconds(offsetLastLogTime):yyyy-MM-ddTHH:mm:ss.fffZ})");
451+
}
425452

426453
Console.WriteLine($" -> Comparing: offsetLastLogTime={offsetLastLogTime}s vs endTime={endTime}s");
427454
Console.WriteLine($" -> End time as date: {DateTimeOffset.FromUnixTimeSeconds(endTime):yyyy-MM-ddTHH:mm:ss.fffZ}");
@@ -486,11 +513,13 @@ private static async Task<List<List<ResultField>>> PerformLargeQuery(
486513

487514
/// <summary>
488515
/// Splits a date range in half.
516+
/// Range 2 starts at midpoint + 1 second to avoid overlap.
489517
/// </summary>
490518
private static (long range1Start, long range1End, long range2Start, long range2End) SplitDateRange(long startTime, long endTime)
491519
{
492520
var midpoint = startTime + (endTime - startTime) / 2;
493-
return (startTime, midpoint, midpoint, endTime);
521+
// Range 2 starts at midpoint + 1 to avoid querying the same second twice
522+
return (startTime, midpoint, midpoint + 1, endTime);
494523
}
495524

496525
/// <summary>
@@ -731,5 +760,39 @@ private static long PromptUserForLong(string prompt)
731760
}
732761
return 0;
733762
}
763+
764+
/// <summary>
765+
/// Finds duplicate log entries based on timestamp and message.
766+
/// </summary>
767+
private static List<(string Timestamp, string Message, int Count)> FindDuplicateLogs(List<List<ResultField>> logs)
768+
{
769+
var logSignatures = new Dictionary<string, int>();
770+
771+
foreach (var log in logs)
772+
{
773+
var timestamp = log.Find(f => f.Field == "@timestamp")?.Value ?? "";
774+
var message = log.Find(f => f.Field == "@message")?.Value ?? "";
775+
var signature = $"{timestamp}|{message}";
776+
777+
if (logSignatures.ContainsKey(signature))
778+
{
779+
logSignatures[signature]++;
780+
}
781+
else
782+
{
783+
logSignatures[signature] = 1;
784+
}
785+
}
786+
787+
return logSignatures
788+
.Where(kvp => kvp.Value > 1)
789+
.Select(kvp =>
790+
{
791+
var parts = kvp.Key.Split('|');
792+
return (Timestamp: parts[0], Message: parts[1], Count: kvp.Value);
793+
})
794+
.OrderByDescending(x => x.Count)
795+
.ToList();
796+
}
734797
}
735798
// snippet-end:[CloudWatchLogs.dotnetv4.LargeQueryWorkflow]

0 commit comments

Comments
 (0)