|
| 1 | +using Azure.AI.OpenAI; |
| 2 | +using Azure; |
| 3 | +using System.Collections.ObjectModel; |
| 4 | +using System.Globalization; |
| 5 | + |
| 6 | +namespace DataCleaning_Preprocessing.Service |
| 7 | +{ |
| 8 | + internal class AzureOpenAIService |
| 9 | + { |
| 10 | + const string endpoint = "https://your_end_point.openai.azure.com"; |
| 11 | + const string deploymentName = "GPT35Turbo"; |
| 12 | + |
| 13 | + string key = ""; |
| 14 | + |
| 15 | + public AzureOpenAIService(string key) |
| 16 | + { |
| 17 | + this.key = key; |
| 18 | + } |
| 19 | + |
| 20 | + public async Task<ObservableCollection<WebsiteTrafficData>> GetCleanedData(ObservableCollection<WebsiteTrafficData> rawData) |
| 21 | + { |
| 22 | + ObservableCollection<WebsiteTrafficData> collection = new ObservableCollection<WebsiteTrafficData>(); |
| 23 | + |
| 24 | + var chatCompletionsOptions = new ChatCompletionsOptions |
| 25 | + { |
| 26 | + DeploymentName = deploymentName, |
| 27 | + Temperature = (float)0.5, |
| 28 | + MaxTokens = 800, |
| 29 | + NucleusSamplingFactor = (float)0.95, |
| 30 | + FrequencyPenalty = 0, |
| 31 | + PresencePenalty = 0, |
| 32 | + }; |
| 33 | + |
| 34 | + |
| 35 | + var prompt = $"Clean the following e-commerce website traffic data, resolve outliers and fill missing values:\n{string.Join("\n", rawData.Select(d => $"{d.DateTime:yyyy-MM-dd-HH-m-ss}: {d.Visitors}"))} and the output cleaned data should be in the yyyy-MM-dd-HH-m-ss:Value, not required explanations"; |
| 36 | + chatCompletionsOptions.Messages.Add(new ChatRequestUserMessage(prompt)); |
| 37 | + try |
| 38 | + { |
| 39 | + var client = new OpenAIClient(new Uri(endpoint), new AzureKeyCredential(key)); |
| 40 | + var response = await client.GetChatCompletionsAsync(chatCompletionsOptions); |
| 41 | + return GetCleanedData(response.Value.Choices[0].Message.Content, collection); |
| 42 | + } |
| 43 | + catch (Exception ex) |
| 44 | + { |
| 45 | + return GetDummyData(collection); |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + ObservableCollection<WebsiteTrafficData> GetCleanedData(string json, ObservableCollection<WebsiteTrafficData> collection) |
| 50 | + { |
| 51 | + if (string.IsNullOrEmpty(json)) |
| 52 | + { |
| 53 | + return new ObservableCollection<WebsiteTrafficData>(); |
| 54 | + } |
| 55 | + |
| 56 | + var lines = json.Split('\n'); |
| 57 | + foreach (var line in lines) |
| 58 | + { |
| 59 | + if (string.IsNullOrWhiteSpace(line)) |
| 60 | + continue; |
| 61 | + |
| 62 | + var parts = line.Split(':'); |
| 63 | + if (parts.Length == 2) |
| 64 | + { |
| 65 | + var date = DateTime.ParseExact(parts[0].Trim(), "yyyy-MM-dd-HH-m-ss", CultureInfo.InvariantCulture); |
| 66 | + var high = double.Parse(parts[1].Trim()); |
| 67 | + |
| 68 | + collection.Add(new WebsiteTrafficData { DateTime = date, Visitors = high }); |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + return collection; |
| 73 | + } |
| 74 | + |
| 75 | + private ObservableCollection<WebsiteTrafficData> GetDummyData(ObservableCollection<WebsiteTrafficData> collection) |
| 76 | + { |
| 77 | + return new ObservableCollection<WebsiteTrafficData>() { |
| 78 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 00, 00, 00), Visitors = 150 }, |
| 79 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 01, 00, 00), Visitors = 160 }, |
| 80 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 02, 00, 00), Visitors = 155 }, |
| 81 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 03, 00, 00), Visitors = 162 }, // Missing data |
| 82 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 04, 00, 00), Visitors = 170 }, |
| 83 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 05, 00, 00), Visitors = 175 }, |
| 84 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 06, 00, 00), Visitors = 145 }, // Missing data |
| 85 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 07, 00, 00), Visitors = 180 }, |
| 86 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 08, 00, 00), Visitors = 190 }, |
| 87 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 09, 00, 00), Visitors = 185 }, |
| 88 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 10, 00, 00), Visitors = 200 }, |
| 89 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 11, 00, 00), Visitors = 207 }, // Missing data |
| 90 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 12, 00, 00), Visitors = 220 }, |
| 91 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 13, 00, 00), Visitors = 230 }, |
| 92 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 14, 00, 00), Visitors = 237 }, // Missing data |
| 93 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 15, 00, 00), Visitors = 250 }, |
| 94 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 16, 00, 00), Visitors = 260 }, |
| 95 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 17, 00, 00), Visitors = 270 }, |
| 96 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 18, 00, 00), Visitors = 277 }, // Missing data |
| 97 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 19, 00, 00), Visitors = 280 }, |
| 98 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 20, 00, 00), Visitors = 290 }, |
| 99 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 21, 00, 00), Visitors = 300 }, |
| 100 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 22, 00, 00), Visitors = 307 }, // Missing data |
| 101 | + new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 23, 00, 00), Visitors = 320 }, |
| 102 | + }; |
| 103 | + } |
| 104 | + } |
| 105 | +} |
0 commit comments