Skip to content

Commit 2253b8a

Browse files
Merge pull request #1 from SyncfusionExamples/Data_cleaning_preprocessing
Added the sample for data cleaning and preprocessing
2 parents 4009bf1 + b975a2b commit 2253b8a

File tree

12 files changed

+428
-2
lines changed

12 files changed

+428
-2
lines changed

DataCleaning/App.xaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Application x:Class="DataCleaning_Preprocessing.App"
2+
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
3+
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
4+
xmlns:local="clr-namespace:DataCleaning_Preprocessing"
5+
>
6+
<Application.Resources>
7+
8+
</Application.Resources>
9+
</Application>

DataCleaning/App.xaml.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
using DataCleaning_Preprocessing;
2+
using System.Configuration;
3+
using System.Data;
4+
using System.Windows;
5+
using Microsoft.Extensions.DependencyInjection;
6+
7+
namespace DataCleaning_Preprocessing
8+
{
9+
/// <summary>
10+
/// Interaction logic for App.xaml
11+
/// </summary>
12+
public partial class App : Application
13+
{
14+
protected override void OnStartup(StartupEventArgs e)
15+
{
16+
base.OnStartup(e);
17+
new MainWindow().Show();
18+
}
19+
}
20+
}

DataCleaning/AssemblyInfo.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System.Windows;
2+
3+
[assembly: ThemeInfo(
4+
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
5+
//(used if a resource is not found in the page,
6+
// or application resource dictionaries)
7+
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
8+
//(used if a resource is not found in the page,
9+
// app, or any theme specific resource dictionaries)
10+
)]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>WinExe</OutputType>
5+
<TargetFramework>net8.0-windows</TargetFramework>
6+
<RootNamespace>DataCleaning_Preprocessing</RootNamespace>
7+
<Nullable>enable</Nullable>
8+
<ImplicitUsings>enable</ImplicitUsings>
9+
<UseWPF>true</UseWPF>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="Azure.AI.OpenAI" Version="1.0.0-beta.12" />
14+
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="9.0.0-preview.6.24327.7" />
15+
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
16+
<PackageReference Include="Syncfusion.SfBusyIndicator.WPF" Version="*" />
17+
<PackageReference Include="Syncfusion.SfChart.WPF" Version="*" />
18+
</ItemGroup>
19+
20+
</Project>
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<PropertyGroup />
4+
<ItemGroup>
5+
<ApplicationDefinition Update="App.xaml">
6+
<SubType>Designer</SubType>
7+
</ApplicationDefinition>
8+
</ItemGroup>
9+
<ItemGroup>
10+
<Page Update="MainWindow.xaml">
11+
<SubType>Designer</SubType>
12+
</Page>
13+
</ItemGroup>
14+
</Project>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 17
4+
VisualStudioVersion = 17.10.34916.146
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DataCleaning&Preprocessing", "DataCleaning&Preprocessing.csproj", "{9E55B90B-5FF4-4CE9-AF93-8109414D4A6A}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{9E55B90B-5FF4-4CE9-AF93-8109414D4A6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{9E55B90B-5FF4-4CE9-AF93-8109414D4A6A}.Debug|Any CPU.Build.0 = Debug|Any CPU
16+
{9E55B90B-5FF4-4CE9-AF93-8109414D4A6A}.Release|Any CPU.ActiveCfg = Release|Any CPU
17+
{9E55B90B-5FF4-4CE9-AF93-8109414D4A6A}.Release|Any CPU.Build.0 = Release|Any CPU
18+
EndGlobalSection
19+
GlobalSection(SolutionProperties) = preSolution
20+
HideSolutionNode = FALSE
21+
EndGlobalSection
22+
GlobalSection(ExtensibilityGlobals) = postSolution
23+
SolutionGuid = {331E405D-7FB9-41D5-AE66-00FC8AC3BA8C}
24+
EndGlobalSection
25+
EndGlobal

DataCleaning/MainWindow.xaml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<Window x:Class="DataCleaning_Preprocessing.MainWindow"
2+
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
3+
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
4+
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
5+
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
6+
xmlns:local="clr-namespace:DataCleaning_Preprocessing"
7+
xmlns:Notification="clr-namespace:Syncfusion.Windows.Controls.Notification;assembly=Syncfusion.SfBusyIndicator.WPF"
8+
xmlns:syncfusion="clr-namespace:Syncfusion.UI.Xaml.Charts;assembly=Syncfusion.SfChart.WPF"
9+
mc:Ignorable="d">
10+
11+
<Border Margin="20" Padding="7" BorderThickness="2" CornerRadius="10" BorderBrush="#b0b8bf">
12+
13+
<Grid>
14+
15+
<Grid.RowDefinitions>
16+
<RowDefinition Height="Auto"/>
17+
<RowDefinition Height="*"/>
18+
</Grid.RowDefinitions>
19+
20+
<Grid Grid.Row="0">
21+
22+
<Grid.ColumnDefinitions>
23+
<ColumnDefinition Width="13"/>
24+
<ColumnDefinition Width="*"/>
25+
</Grid.ColumnDefinitions>
26+
27+
<StackPanel Grid.RowSpan="2" Margin="0,8,0,6" Orientation="Vertical" Background="YellowGreen" />
28+
29+
<StackPanel Grid.Column="1" Margin="5,0,0,0" Orientation="Vertical">
30+
<Label Content="E-Commerce Website Traffic Data" FontSize="27" FontWeight="Bold" Foreground="#666666"/>
31+
<Label Content="Tracking the number of visitors to an e-commerce website every hour" Foreground="Gray" FontSize="23"/>
32+
</StackPanel>
33+
34+
</Grid>
35+
36+
<Grid Grid.Row="1" >
37+
38+
<syncfusion:SfChart Palette="Custom" Margin="5" >
39+
40+
<syncfusion:SfChart.ColorModel>
41+
<syncfusion:ChartColorModel>
42+
<syncfusion:ChartColorModel.CustomBrushes>
43+
<SolidColorBrush Color="#ffa600"/>
44+
<SolidColorBrush Color="#58508d"/>
45+
<SolidColorBrush Color="#ff208d"/>
46+
</syncfusion:ChartColorModel.CustomBrushes>
47+
</syncfusion:ChartColorModel>
48+
</syncfusion:SfChart.ColorModel>
49+
50+
<syncfusion:SfChart.PrimaryAxis>
51+
<syncfusion:DateTimeAxis LabelFormat="hh tt" ShowGridLines="False" EdgeLabelsDrawingMode="Shift">
52+
<syncfusion:DateTimeAxis.LabelStyle>
53+
<syncfusion:LabelStyle FontSize="12.8"/>
54+
</syncfusion:DateTimeAxis.LabelStyle>
55+
</syncfusion:DateTimeAxis>
56+
</syncfusion:SfChart.PrimaryAxis>
57+
58+
<syncfusion:SfChart.SecondaryAxis>
59+
<syncfusion:NumericalAxis ShowGridLines="False">
60+
61+
<syncfusion:NumericalAxis.LabelStyle>
62+
<syncfusion:LabelStyle FontSize="12.8"/>
63+
</syncfusion:NumericalAxis.LabelStyle>
64+
65+
</syncfusion:NumericalAxis>
66+
</syncfusion:SfChart.SecondaryAxis>
67+
68+
<syncfusion:FastLineSeries x:Name="CleanedDataSeries" ItemsSource="{Binding CleanedData}" XBindingPath="DateTime" YBindingPath="Visitors"/>
69+
70+
<syncfusion:FastLineSeries x:Name="RawDataSeries" ItemsSource="{Binding RawData}" XBindingPath="DateTime" YBindingPath="Visitors"/>
71+
72+
</syncfusion:SfChart>
73+
74+
<Notification:SfBusyIndicator x:Name="busy" IsBusy="{Binding IsBusy}"/>
75+
76+
</Grid>
77+
</Grid>
78+
79+
</Border>
80+
</Window>

DataCleaning/MainWindow.xaml.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System.Reflection;
2+
using System.Text;
3+
using System.Windows;
4+
using System.Windows.Controls;
5+
using Microsoft.Extensions.DependencyInjection;
6+
7+
namespace DataCleaning_Preprocessing
8+
{
9+
/// <summary>
10+
/// Interaction logic for MainWindow.xaml
11+
/// </summary>
12+
public partial class MainWindow : Window
13+
{
14+
private readonly ViewModel _viewModel;
15+
public MainWindow()
16+
{
17+
InitializeComponent();
18+
_viewModel = new ViewModel();
19+
DataContext = _viewModel;
20+
}
21+
22+
public override void OnApplyTemplate()
23+
{
24+
base.OnApplyTemplate();
25+
26+
_viewModel.IsBusy = true;
27+
Task.Run(async () =>
28+
{
29+
await _viewModel.LoadCleanedDataAsync();
30+
});
31+
}
32+
}
33+
}

DataCleaning/Model/Model.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
namespace DataCleaning_Preprocessing
2+
{
3+
public class WebsiteTrafficData
4+
{
5+
public DateTime DateTime { get; set; }
6+
7+
public double Visitors { get; set; }
8+
}
9+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
using Azure.AI.OpenAI;
2+
using Azure;
3+
using System.Collections.ObjectModel;
4+
using System.Globalization;
5+
6+
namespace DataCleaning_Preprocessing.Service
7+
{
8+
internal class AzureOpenAIService
9+
{
10+
const string endpoint = "https://your_end_point.openai.azure.com";
11+
const string deploymentName = "GPT35Turbo";
12+
13+
string key = "";
14+
15+
public AzureOpenAIService(string key)
16+
{
17+
this.key = key;
18+
}
19+
20+
public async Task<ObservableCollection<WebsiteTrafficData>> GetCleanedData(ObservableCollection<WebsiteTrafficData> rawData)
21+
{
22+
ObservableCollection<WebsiteTrafficData> collection = new ObservableCollection<WebsiteTrafficData>();
23+
24+
var chatCompletionsOptions = new ChatCompletionsOptions
25+
{
26+
DeploymentName = deploymentName,
27+
Temperature = (float)0.5,
28+
MaxTokens = 800,
29+
NucleusSamplingFactor = (float)0.95,
30+
FrequencyPenalty = 0,
31+
PresencePenalty = 0,
32+
};
33+
34+
35+
var prompt = $"Clean the following e-commerce website traffic data, resolve outliers and fill missing values:\n{string.Join("\n", rawData.Select(d => $"{d.DateTime:yyyy-MM-dd-HH-m-ss}: {d.Visitors}"))} and the output cleaned data should be in the yyyy-MM-dd-HH-m-ss:Value, not required explanations";
36+
chatCompletionsOptions.Messages.Add(new ChatRequestUserMessage(prompt));
37+
try
38+
{
39+
var client = new OpenAIClient(new Uri(endpoint), new AzureKeyCredential(key));
40+
var response = await client.GetChatCompletionsAsync(chatCompletionsOptions);
41+
return GetCleanedData(response.Value.Choices[0].Message.Content, collection);
42+
}
43+
catch (Exception ex)
44+
{
45+
return GetDummyData(collection);
46+
}
47+
}
48+
49+
ObservableCollection<WebsiteTrafficData> GetCleanedData(string json, ObservableCollection<WebsiteTrafficData> collection)
50+
{
51+
if (string.IsNullOrEmpty(json))
52+
{
53+
return new ObservableCollection<WebsiteTrafficData>();
54+
}
55+
56+
var lines = json.Split('\n');
57+
foreach (var line in lines)
58+
{
59+
if (string.IsNullOrWhiteSpace(line))
60+
continue;
61+
62+
var parts = line.Split(':');
63+
if (parts.Length == 2)
64+
{
65+
var date = DateTime.ParseExact(parts[0].Trim(), "yyyy-MM-dd-HH-m-ss", CultureInfo.InvariantCulture);
66+
var high = double.Parse(parts[1].Trim());
67+
68+
collection.Add(new WebsiteTrafficData { DateTime = date, Visitors = high });
69+
}
70+
}
71+
72+
return collection;
73+
}
74+
75+
private ObservableCollection<WebsiteTrafficData> GetDummyData(ObservableCollection<WebsiteTrafficData> collection)
76+
{
77+
return new ObservableCollection<WebsiteTrafficData>() {
78+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 00, 00, 00), Visitors = 150 },
79+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 01, 00, 00), Visitors = 160 },
80+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 02, 00, 00), Visitors = 155 },
81+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 03, 00, 00), Visitors = 162 }, // Missing data
82+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 04, 00, 00), Visitors = 170 },
83+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 05, 00, 00), Visitors = 175 },
84+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 06, 00, 00), Visitors = 145 }, // Missing data
85+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 07, 00, 00), Visitors = 180 },
86+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 08, 00, 00), Visitors = 190 },
87+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 09, 00, 00), Visitors = 185 },
88+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 10, 00, 00), Visitors = 200 },
89+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 11, 00, 00), Visitors = 207 }, // Missing data
90+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 12, 00, 00), Visitors = 220 },
91+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 13, 00, 00), Visitors = 230 },
92+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 14, 00, 00), Visitors = 237 }, // Missing data
93+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 15, 00, 00), Visitors = 250 },
94+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 16, 00, 00), Visitors = 260 },
95+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 17, 00, 00), Visitors = 270 },
96+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 18, 00, 00), Visitors = 277 }, // Missing data
97+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 19, 00, 00), Visitors = 280 },
98+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 20, 00, 00), Visitors = 290 },
99+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 21, 00, 00), Visitors = 300 },
100+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 22, 00, 00), Visitors = 307 }, // Missing data
101+
new WebsiteTrafficData { DateTime = new DateTime(2024, 07, 01, 23, 00, 00), Visitors = 320 },
102+
};
103+
}
104+
}
105+
}

0 commit comments

Comments
 (0)