-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRandomDatasetGenNPreprocessor.py
More file actions
132 lines (116 loc) · 6.09 KB
/
RandomDatasetGenNPreprocessor.py
File metadata and controls
132 lines (116 loc) · 6.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#Importing NumPy for dataset generation and preprocessing
import numpy as np
#Importing Matplotlib for plotting the datasets
import matplotlib.pyplot as plt
#Importing random to generate random seed
import random
#Importing math to calculate functions
import math
#Set a random seed for reproducibility
def set_random_seed(seed):
random.seed(seed) #generating random seed using random library
np.random.seed(seed) #setting the random generated seed
#Function to generate synthetic dataset
def generate_dataset(N, x_min, x_max):
#Generate N random X values uniformly distributed within xmin and xmax
X=np.random.uniform(x_min, x_max, N)
# Randomly generate constants A, B, C, D, E, F
#A, C and E between -10 to 10 and B, D and F between -5 to 5
A=random.uniform(-10, 10)
B=random.uniform(-5, 5)
C=random.uniform(-10, 10)
D=random.uniform(-5, 5)
E=random.uniform(-10, 10)
F=random.uniform(-5, 5)
#Randomly choose functions f1, f2, f3 from the set
functions = [np.sin, np.cos, np.tan, np.log, lambda x: x**2, lambda x: x**3] #Declaring the functions list as specified
fns_log = [np.sin, np.cos, np.tan, lambda x: x**2, lambda x: x**3] #Declaring the functions without log to check for negative values
inf1=B*X #Local variable for input of f1
inf2=D*X #Local variable for input of f2
inf3=F*X #Local variable for input of f3
fin_in=np.array([inf1, inf2, inf3]) #Making an array to check for all input values if any negative or not
if np.any(fin_in <= 0): #Checking for any negative value in the dataset using any function of NumPy
functions=fns_log #If there exists any, replace functions choices with those without log
f1=random.choice(functions) #Randomly choose f1
f2=random.choice(functions) #Randomly choose f2
f3=random.choice(functions) #Randomly choose f3
#Function to apply log only if x is positive
def safe_log(x):
return np.log(x) if x > 0 else 0 # Return 0 if x<=0
Y = A * f1(B * X) + C * f2(D * X) + E * f3(F * X) #Calculating and generating the values of Y
return X, Y #Returning the generated dataset X and Y
#Function to plot scatter plot of X and Y using label,and scatter funcs from matplotlib
def plot_scatter(X, Y):
plt.figure(figsize=(8, 6)) #keeping figure size 8x6
plt.scatter(X, Y, color='blue', label='Data points') #Blue colour for the plot and label Data Points
plt.title("Scatter Plot: X vs Y") #Title as Scatter Plot: X vs Y
plt.xlabel("X") #Giving xlabel
plt.ylabel("Y") #Giving ylabel
plt.legend() #Applying legend for the label datapoints
plt.grid(True) #Grid display
plt.show() #Display the plot
#Function to plot histogram plot of X and Y using label,and hist funcs from matplotlib
def plot_histogram(X, N):
#Use the square root rule to determine the number of bins
bins = int(np.sqrt(N)) #Calculating the number of appropriate bins
plt.figure(figsize=(8, 6)) #keeping teh figure size as 8x6
plt.hist(X, bins=bins, color='green', edgecolor='black', label='X Distribution') #providing bins, color as green and label as X distribution
plt.title("Histogram of X") #Title as Histogram of X
plt.xlabel("X") #Giving xlabel
plt.ylabel("Frequency") #Giving ylabel
plt.legend() #Applying legend for the label X Distribution
plt.grid(True) #Grid display
plt.show() #Displaying the plot
#Function to plot box plot of X and Y using label,and boxplot funcs from matplotlib
def plot_box(Y):
plt.figure(figsize=(8, 6)) #keeping the figure size as 8x6
#Create the box plot without the 'label' argument since it is only for Y and not multiple data
box = plt.boxplot(Y, vert=False, patch_artist=True, boxprops=dict(facecolor="lightblue", color="black"), medianprops=dict(color="red", linewidth=2))
plt.title("Box Plot of Y") #Title as Box Plot of Y
plt.xlabel("Y") #Giving xlabel
plt.legend([box["boxes"][0]], ['Y Values'], loc='upper right') #Adding a legend manually after the box plot is created
plt.grid(True) #Grid display
plt.show() #Display the plot
#Function to plot line plot of X and Y using label,and plot funcs from matplotlib
def plot_line(X, Y):
sorted_indices = np.argsort(X) #sort the indices
sorted_X = X[sorted_indices] #sort X dataset
sorted_Y = Y[sorted_indices] #sort Y dataset
plt.figure(figsize=(8, 6)) #keeping teh figure size 8x6
plt.plot(sorted_X, sorted_Y, color='red', label='Sorted X vs Y') #plotting sorted x and y, color red and label as Sorted X vs Y
plt.title("Line Plot of Sorted X vs Y") #Title as Line plot of Sorted X vs Y
plt.xlabel("Sorted X") #Giving xlabel
plt.ylabel("Y") #Giving ylabel
plt.legend() #Legend for the label Sorted X vs Y
plt.grid(True) #Grid display
plt.show() #Display the plot
def main():
set_random_seed(29)
while(True):
try: #Trying to take int input
N=int(input("Enter N : ")) #Taking input as N
if N<=0: #If N negative
print("Please enter a positive value of N") #Error message display
continue
except ValueError: #If typecast unsuccessful
print("Please enter a Valid numerical N") #Error message display
continue
try:
xmin=int(input("Enter xmin : ")) #Taking xmin input
except ValueError: #If typecast unsuccessful
print("Please enter valid numerical xmin") #Error message display
continue
try:
xmax=int(input("Enter xmax : ")) #Taking xmax input
except ValueError: #If typecast unsuccessful
print("Please enter valid numerical xmax") #Error message display
continue
break #Break if everything correct
X, Y = generate_dataset(N, xmin, xmax) #Generating random data based on N, xmin and xmax
plot_scatter(X, Y) #Plotting scatter plot
plot_histogram(X, N) #Plotting histogram plot
plot_box(Y) #Plotting box plot
plot_line(X, Y) #Plotting line plot
#If the file containing the main and not the file in which this file is imported is running the program then call main
if __name__ == "__main__":
main()