From 8cc6da76afc32a77d7b7b9a66db8e8ea01bcbc0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ander=20Fern=C3=A1ndez?= Date: Thu, 27 Mar 2025 11:45:13 +0100 Subject: [PATCH 01/13] Main Program --- JsonGenerator.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 JsonGenerator.py diff --git a/JsonGenerator.py b/JsonGenerator.py new file mode 100644 index 0000000..487422b --- /dev/null +++ b/JsonGenerator.py @@ -0,0 +1,62 @@ +#Author: Anderrow +#https://github.com/anderrow + +import pandas as pd +import json +import os + +path = "../ProcessViewMessages.xlsm" +#Read all sheets into a dictionary of DataFrames +dfs = pd.read_excel(path, sheet_name=None) + +del dfs["ProtonView"] #ProtonView sheet is not needed + +#--------------------------------FILTER DATAFRAMES--------------------------------# +df_filtered_dict = {} #Init + +for sheet_name, df in dfs.items(): + #Save the reference for the df in the dict + + #Filter dataframe erasing rows that ends with Undefine (Case sensitive is set to false, Nan values are erased) + df_filtered = df[~df.iloc[:, 2].str.contains(r'.*Undefined$', case=False, na=False)] + #Filter dataframe for 'Uknown message' + df_filtered2 = df_filtered[~df_filtered.iloc[:, 2].str.contains('Unkown message', case=False, na=False)] + + #Select the second column (Keys) and the 3rd columns (Messages) + df_filtered_subset = df_filtered2.iloc[:, [1, 2]] + + #Erase rows if there is a NaN value (Not message found) + df_cleaned = df_filtered_subset.dropna() + + # Keep the daframe filtered + df_filtered_dict[sheet_name] = df_cleaned + + # Print Before and after + print("=" * 50) + print(f"Sheet: {sheet_name}") + print(f"Original length: {len(df)}") + print(f"Filtered length: {len(df_cleaned)}") + print(f"Data has been reduced by a {100 - len(df_cleaned)*100/len(df):.2f}%") + print("=" * 50) + +#--------------------------------END OF FILTER DATAFRAMES--------------------------------# + + +#--------------------------------GENERATE JSON FILES--------------------------------# +# Create the folder in the parent directory if it doesn't exist +output_folder = "../JSON FILES" +os.makedirs(output_folder, exist_ok=True) + +# Loop to save the JSON files +for sheet_name, df in df_filtered_dict.items(): + # Generate the name of the file based on the name of the excel sheet + json_filename = os.path.join(output_folder, f"{sheet_name}.json") + + # Convert the df to a dict and save it as a JSON + df_to_json = df.to_dict(orient='records') + + with open(json_filename, 'w') as json_file: + json.dump(df_to_json, json_file) # Removed indent parameter for no line breaks + + print(f"JSON file {json_filename} has been created.") +#--------------------------------END OFGENERATE JSON FILESS--------------------------------# \ No newline at end of file From d8784dc88f115ae43648095fd908d6e027284fdb Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 12:12:12 +0100 Subject: [PATCH 02/13] Bulletproof and delete Pointer Overview --- JsonGenerator.py | 51 +++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 487422b..859a3a5 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -1,48 +1,55 @@ -#Author: Anderrow -#https://github.com/anderrow +# Author: Anderrow +# https://github.com/anderrow import pandas as pd import json import os path = "../ProcessViewMessages.xlsm" -#Read all sheets into a dictionary of DataFrames -dfs = pd.read_excel(path, sheet_name=None) +# Read all sheets into a dictionary of DataFrames +dfs = pd.read_excel(path, sheet_name=None) -del dfs["ProtonView"] #ProtonView sheet is not needed +del dfs["ProtonView"] # ProtonView sheet is not needed +del dfs["Pointer overview"] # Pointer Overview is not needed -#--------------------------------FILTER DATAFRAMES--------------------------------# -df_filtered_dict = {} #Init +# --------------------------------FILTER DATAFRAMES--------------------------------# +df_filtered_dict = {} # Init for sheet_name, df in dfs.items(): - #Save the reference for the df in the dict + # Save the reference for the df in the dict - #Filter dataframe erasing rows that ends with Undefine (Case sensitive is set to false, Nan values are erased) + # Replace NaN with an empty string before converting the third column to string + df.iloc[:, 2] = df.iloc[:, 2].fillna('') + + # Convert the third column to string to avoid issues with non-string values + df.iloc[:, 2] = df.iloc[:, 2].astype(str) + + # Filter dataframe erasing rows that ends with Undefine (Case sensitive is set to false, Nan values are erased) df_filtered = df[~df.iloc[:, 2].str.contains(r'.*Undefined$', case=False, na=False)] - #Filter dataframe for 'Uknown message' + + # Filter dataframe for 'Unknown message' df_filtered2 = df_filtered[~df_filtered.iloc[:, 2].str.contains('Unkown message', case=False, na=False)] - #Select the second column (Keys) and the 3rd columns (Messages) + # Select the second column (Keys) and the third column (Messages) df_filtered_subset = df_filtered2.iloc[:, [1, 2]] - #Erase rows if there is a NaN value (Not message found) + # Erase rows if there is a NaN value (Not message found) df_cleaned = df_filtered_subset.dropna() - - # Keep the daframe filtered + + # Keep the dataframe filtered df_filtered_dict[sheet_name] = df_cleaned - + # Print Before and after print("=" * 50) print(f"Sheet: {sheet_name}") print(f"Original length: {len(df)}") print(f"Filtered length: {len(df_cleaned)}") - print(f"Data has been reduced by a {100 - len(df_cleaned)*100/len(df):.2f}%") + print(f"Data has been reduced by a {100 - len(df_cleaned) * 100 / len(df):.2f}%") print("=" * 50) -#--------------------------------END OF FILTER DATAFRAMES--------------------------------# - +# --------------------------------END OF FILTER DATAFRAMES--------------------------------# -#--------------------------------GENERATE JSON FILES--------------------------------# +# --------------------------------GENERATE JSON FILES--------------------------------# # Create the folder in the parent directory if it doesn't exist output_folder = "../JSON FILES" os.makedirs(output_folder, exist_ok=True) @@ -51,12 +58,12 @@ for sheet_name, df in df_filtered_dict.items(): # Generate the name of the file based on the name of the excel sheet json_filename = os.path.join(output_folder, f"{sheet_name}.json") - + # Convert the df to a dict and save it as a JSON df_to_json = df.to_dict(orient='records') - + with open(json_filename, 'w') as json_file: json.dump(df_to_json, json_file) # Removed indent parameter for no line breaks print(f"JSON file {json_filename} has been created.") -#--------------------------------END OFGENERATE JSON FILESS--------------------------------# \ No newline at end of file +# --------------------------------END OF GENERATE JSON FILES--------------------------------# From 722e7572f563513e4652125081e539106a95c6eb Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 13:35:28 +0100 Subject: [PATCH 03/13] Correction in comment. Line 27, Values are skipped, not erased. Nan values are erased on line 37. --- JsonGenerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 859a3a5..9f4b371 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -24,7 +24,7 @@ # Convert the third column to string to avoid issues with non-string values df.iloc[:, 2] = df.iloc[:, 2].astype(str) - # Filter dataframe erasing rows that ends with Undefine (Case sensitive is set to false, Nan values are erased) + # Filter dataframe erasing rows that ends with Undefine (Case sensitive is set to false, Nan values are skipped) df_filtered = df[~df.iloc[:, 2].str.contains(r'.*Undefined$', case=False, na=False)] # Filter dataframe for 'Unknown message' From 19b9f91478353e6ee30329294014e1157a571bcf Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 13:36:39 +0100 Subject: [PATCH 04/13] Read README.me comment added --- JsonGenerator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 9f4b371..56356f1 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -1,4 +1,5 @@ # Author: Anderrow +# Read the README.md file for more info. # https://github.com/anderrow import pandas as pd @@ -16,8 +17,6 @@ df_filtered_dict = {} # Init for sheet_name, df in dfs.items(): - # Save the reference for the df in the dict - # Replace NaN with an empty string before converting the third column to string df.iloc[:, 2] = df.iloc[:, 2].fillna('') From 900d548673406f1b1341b923cd20e11cbc129550 Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 15:23:21 +0100 Subject: [PATCH 05/13] Correct the formart of the json --- JsonGenerator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 56356f1..31b2bd8 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -58,11 +58,11 @@ # Generate the name of the file based on the name of the excel sheet json_filename = os.path.join(output_folder, f"{sheet_name}.json") - # Convert the df to a dict and save it as a JSON - df_to_json = df.to_dict(orient='records') + # Convert the DataFrame to a dictionary with Column1 as the key and Column2 as the value + df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, 1])) with open(json_filename, 'w') as json_file: - json.dump(df_to_json, json_file) # Removed indent parameter for no line breaks + json.dump(df_to_json, json_file) # Saving without indent for a compact format print(f"JSON file {json_filename} has been created.") # --------------------------------END OF GENERATE JSON FILES--------------------------------# From fccad3067a568d5bed50b72983bb6257a7743189 Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 15:24:55 +0100 Subject: [PATCH 06/13] Erase also empty strings, not only NaN values --- JsonGenerator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/JsonGenerator.py b/JsonGenerator.py index 31b2bd8..1170ea4 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -34,6 +34,7 @@ # Erase rows if there is a NaN value (Not message found) df_cleaned = df_filtered_subset.dropna() + df_cleaned = df_filtered_subset[df_filtered_subset.iloc[:, 1] != ""] # Keep the dataframe filtered df_filtered_dict[sheet_name] = df_cleaned From a63b0e506d30372e714b39164ee8a34024c1cff1 Mon Sep 17 00:00:00 2001 From: anderrow Date: Thu, 27 Mar 2025 15:54:58 +0100 Subject: [PATCH 07/13] JSON OOP Alternative generating for practice OOP --- AlternativeOOP.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 AlternativeOOP.py diff --git a/AlternativeOOP.py b/AlternativeOOP.py new file mode 100644 index 0000000..c1bb258 --- /dev/null +++ b/AlternativeOOP.py @@ -0,0 +1,78 @@ +import pandas as pd +import json +import os + +class ExcelProcessor: + def __init__(self, path, output_folder): + # Initialize the class with the Excel file path and output folder + self.path = path + self.output_folder = output_folder + + def process_excel(self): + # Read all sheets from the Excel file + dfs = pd.read_excel(self.path, sheet_name=None) + + # Remove unnecessary sheets + del dfs["ProtonView"] # ProtonView sheet is not needed + del dfs["Pointer overview"] # Pointer Overview is not needed + + df_filtered_dict = {} # Dictionary to store filtered DataFrames + + # Process each sheet + for sheet_name, df in dfs.items(): + # Clean and prepare the data + df.iloc[:, 2] = df.iloc[:, 2].fillna('') + df.iloc[:, 2] = df.iloc[:, 2].astype(str) + df_filtered = df[~df.iloc[:, 2].str.contains(r'.*Undefined$', case=False, na=False)] + df_filtered2 = df_filtered[~df_filtered.iloc[:, 2].str.contains('Unkown message', case=False, na=False)] + df_filtered_subset = df_filtered2.iloc[:, [1, 2]] + df_cleaned = df_filtered_subset.dropna() + df_cleaned = df_filtered_subset[df_filtered_subset.iloc[:, 1] != ""] + + # Store the filtered DataFrame in the dictionary + df_filtered_dict[sheet_name] = df_cleaned + + # Print stats for each sheet + self.print_stats(sheet_name, len(df), len(df_cleaned)) + + return df_filtered_dict + + def print_stats(self, sheet_name, original_len, filtered_len): + # Print statistics about the processing of each sheet + print("=" * 50) + print(f"Sheet: {sheet_name}") + print(f"Original length: {original_len}") + print(f"Filtered length: {filtered_len}") + print(f"Data has been reduced by a {100 - filtered_len * 100 / original_len:.2f}%") + print("=" * 50) + + def generate_json_files(self, df_filtered_dict): + # Create the output folder if it doesn't exist + os.makedirs(self.output_folder, exist_ok=True) + + # Save the filtered DataFrames as JSON files + for sheet_name, df in df_filtered_dict.items(): + json_filename = os.path.join(self.output_folder, f"{sheet_name}.json") + df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, 1])) + + # Save the JSON file + with open(json_filename, 'w') as json_file: + json.dump(df_to_json, json_file) + + print(f"JSON file {json_filename} has been created.") + +# =================================== +# Example usage of the ExcelProcessor class + +# Path to the Excel file and output folder for JSON files +path = "../ProcessViewMessages.xlsm" +output_folder = "../JSON FILES" + +# Create an instance of the ExcelProcessor class +processor = ExcelProcessor(path, output_folder) + +# Process the Excel file +df_filtered_dict = processor.process_excel() + +# Generate JSON files from the filtered DataFrames +processor.generate_json_files(df_filtered_dict) From 2968aba51dc81ec02034046c75d756e3d3081c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ander=20Fern=C3=A1ndez?= Date: Fri, 28 Mar 2025 09:19:32 +0100 Subject: [PATCH 08/13] Update README.md Correction --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e25e438..340ce1a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # JSON_Generator -This is a JSON generator programmed in Python. It automatically generates a Python file based on the information provided in the 'ProcessViewMessages.xlsm' Excel file. +This is a JSON generator programmed in Python. It automatically generates a JSON file based on the information provided in the 'ProcessViewMessages.xlsm' Excel file. --- It's much better to run this program again on the Python engine; anyway: If .exe is needed: From 05efd1146866fd62671bcdcc3a4601f45c11677a Mon Sep 17 00:00:00 2001 From: anderrow Date: Wed, 2 Apr 2025 16:49:48 +0200 Subject: [PATCH 09/13] Update: All the languages for all the enviroments --- JsonGenerator.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 1170ea4..9a544f7 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -5,15 +5,16 @@ import pandas as pd import json import os +import re path = "../ProcessViewMessages.xlsm" # Read all sheets into a dictionary of DataFrames dfs = pd.read_excel(path, sheet_name=None) +# --------------------------------FILTER DATAFRAMES--------------------------------# del dfs["ProtonView"] # ProtonView sheet is not needed del dfs["Pointer overview"] # Pointer Overview is not needed -# --------------------------------FILTER DATAFRAMES--------------------------------# df_filtered_dict = {} # Init for sheet_name, df in dfs.items(): @@ -30,10 +31,9 @@ df_filtered2 = df_filtered[~df_filtered.iloc[:, 2].str.contains('Unkown message', case=False, na=False)] # Select the second column (Keys) and the third column (Messages) - df_filtered_subset = df_filtered2.iloc[:, [1, 2]] + df_filtered_subset = df_filtered2.iloc[:, 1:9] # Erase rows if there is a NaN value (Not message found) - df_cleaned = df_filtered_subset.dropna() df_cleaned = df_filtered_subset[df_filtered_subset.iloc[:, 1] != ""] # Keep the dataframe filtered @@ -50,20 +50,27 @@ # --------------------------------END OF FILTER DATAFRAMES--------------------------------# # --------------------------------GENERATE JSON FILES--------------------------------# -# Create the folder in the parent directory if it doesn't exist -output_folder = "../JSON FILES" -os.makedirs(output_folder, exist_ok=True) # Loop to save the JSON files for sheet_name, df in df_filtered_dict.items(): - # Generate the name of the file based on the name of the excel sheet - json_filename = os.path.join(output_folder, f"{sheet_name}.json") + # Create the folder in the parent directory if it doesn't exist + output_folder = f"../JSON FILES/{sheet_name}" + os.makedirs(output_folder, exist_ok=True) + print("*"*50) + #Loop to save each column of the dataframe + for column in range(1, df.shape[1]): + column_name = df.columns[column] #Keep name of the column with index column + + column_name_simple = re.sub(r"^([a-zA-Z]+)-.*", r"\1", column_name) + + # Generate the name of the file based on the name of the excel sheet + json_filename = os.path.join(output_folder, f"{column_name_simple}.json") - # Convert the DataFrame to a dictionary with Column1 as the key and Column2 as the value - df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, 1])) + # Convert the DataFrame to a dictionary with Column1 as the key and Column2 as the value + df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, column])) - with open(json_filename, 'w') as json_file: - json.dump(df_to_json, json_file) # Saving without indent for a compact format + with open(json_filename, 'w') as json_file: + json.dump(df_to_json, json_file) # Saving without indent for a compact format - print(f"JSON file {json_filename} has been created.") + print(f"JSON file {json_filename} has been created") # --------------------------------END OF GENERATE JSON FILES--------------------------------# From 9477048a2190ebca7934386485b5e6a4833e1757 Mon Sep 17 00:00:00 2001 From: anderrow Date: Wed, 2 Apr 2025 17:06:06 +0200 Subject: [PATCH 10/13] Delete Alternative OOP --- AlternativeOOP.py | 78 ----------------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 AlternativeOOP.py diff --git a/AlternativeOOP.py b/AlternativeOOP.py deleted file mode 100644 index c1bb258..0000000 --- a/AlternativeOOP.py +++ /dev/null @@ -1,78 +0,0 @@ -import pandas as pd -import json -import os - -class ExcelProcessor: - def __init__(self, path, output_folder): - # Initialize the class with the Excel file path and output folder - self.path = path - self.output_folder = output_folder - - def process_excel(self): - # Read all sheets from the Excel file - dfs = pd.read_excel(self.path, sheet_name=None) - - # Remove unnecessary sheets - del dfs["ProtonView"] # ProtonView sheet is not needed - del dfs["Pointer overview"] # Pointer Overview is not needed - - df_filtered_dict = {} # Dictionary to store filtered DataFrames - - # Process each sheet - for sheet_name, df in dfs.items(): - # Clean and prepare the data - df.iloc[:, 2] = df.iloc[:, 2].fillna('') - df.iloc[:, 2] = df.iloc[:, 2].astype(str) - df_filtered = df[~df.iloc[:, 2].str.contains(r'.*Undefined$', case=False, na=False)] - df_filtered2 = df_filtered[~df_filtered.iloc[:, 2].str.contains('Unkown message', case=False, na=False)] - df_filtered_subset = df_filtered2.iloc[:, [1, 2]] - df_cleaned = df_filtered_subset.dropna() - df_cleaned = df_filtered_subset[df_filtered_subset.iloc[:, 1] != ""] - - # Store the filtered DataFrame in the dictionary - df_filtered_dict[sheet_name] = df_cleaned - - # Print stats for each sheet - self.print_stats(sheet_name, len(df), len(df_cleaned)) - - return df_filtered_dict - - def print_stats(self, sheet_name, original_len, filtered_len): - # Print statistics about the processing of each sheet - print("=" * 50) - print(f"Sheet: {sheet_name}") - print(f"Original length: {original_len}") - print(f"Filtered length: {filtered_len}") - print(f"Data has been reduced by a {100 - filtered_len * 100 / original_len:.2f}%") - print("=" * 50) - - def generate_json_files(self, df_filtered_dict): - # Create the output folder if it doesn't exist - os.makedirs(self.output_folder, exist_ok=True) - - # Save the filtered DataFrames as JSON files - for sheet_name, df in df_filtered_dict.items(): - json_filename = os.path.join(self.output_folder, f"{sheet_name}.json") - df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, 1])) - - # Save the JSON file - with open(json_filename, 'w') as json_file: - json.dump(df_to_json, json_file) - - print(f"JSON file {json_filename} has been created.") - -# =================================== -# Example usage of the ExcelProcessor class - -# Path to the Excel file and output folder for JSON files -path = "../ProcessViewMessages.xlsm" -output_folder = "../JSON FILES" - -# Create an instance of the ExcelProcessor class -processor = ExcelProcessor(path, output_folder) - -# Process the Excel file -df_filtered_dict = processor.process_excel() - -# Generate JSON files from the filtered DataFrames -processor.generate_json_files(df_filtered_dict) From 63f001c28643915b92375c8c2f9602c1d5f818fc Mon Sep 17 00:00:00 2001 From: anderrow Date: Wed, 2 Apr 2025 17:18:40 +0200 Subject: [PATCH 11/13] Update: Throw an error if column_name doesn't fit the patron --- JsonGenerator.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 9a544f7..6580033 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -51,6 +51,9 @@ # --------------------------------GENERATE JSON FILES--------------------------------# +#Define the column name pattern need it with regex for avoid extrange file names. +column_name_pattern = r'^[a-z]{2}-[A-Z]{2}$' + # Loop to save the JSON files for sheet_name, df in df_filtered_dict.items(): # Create the folder in the parent directory if it doesn't exist @@ -58,19 +61,23 @@ os.makedirs(output_folder, exist_ok=True) print("*"*50) #Loop to save each column of the dataframe - for column in range(1, df.shape[1]): - column_name = df.columns[column] #Keep name of the column with index column + for column in range(1, df.shape[1]): + column_name = df.columns[column] #Keep name of the column with index column - column_name_simple = re.sub(r"^([a-zA-Z]+)-.*", r"\1", column_name) + #verify that the column complies with the defined regex pattern + if not re.match(column_name_pattern, column_name): + raise ValueError(f"The column name '{column_name}' doesn't have the requiered format [a-z][a-z]-[A-Z][A-Z]") + + column_name_simple = re.sub(r"^([a-zA-Z]+)-.*", r"\1", column_name) - # Generate the name of the file based on the name of the excel sheet - json_filename = os.path.join(output_folder, f"{column_name_simple}.json") + # Generate the name of the file based on the name of the excel sheet + json_filename = os.path.join(output_folder, f"{column_name_simple}.json") - # Convert the DataFrame to a dictionary with Column1 as the key and Column2 as the value - df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, column])) + # Convert the DataFrame to a dictionary with Column1 as the key and Column2 as the value + df_to_json = dict(zip(df.iloc[:, 0], df.iloc[:, column])) - with open(json_filename, 'w') as json_file: - json.dump(df_to_json, json_file) # Saving without indent for a compact format + with open(json_filename, 'w') as json_file: + json.dump(df_to_json, json_file) # Saving without indent for a compact format - print(f"JSON file {json_filename} has been created") + print(f"JSON file {json_filename} has been created") # --------------------------------END OF GENERATE JSON FILES--------------------------------# From c0434c3366d89623d7e940806f6d4b4a725893f3 Mon Sep 17 00:00:00 2001 From: anderrow Date: Wed, 2 Apr 2025 17:27:05 +0200 Subject: [PATCH 12/13] Verify if the columns are correct before generates the files --- JsonGenerator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index 6580033..c475bdd 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -56,17 +56,18 @@ # Loop to save the JSON files for sheet_name, df in df_filtered_dict.items(): - # Create the folder in the parent directory if it doesn't exist - output_folder = f"../JSON FILES/{sheet_name}" - os.makedirs(output_folder, exist_ok=True) - print("*"*50) - #Loop to save each column of the dataframe + # Create the folder in the parent directory if it doesn't exist + output_folder = f"../JSON FILES/{sheet_name}" + os.makedirs(output_folder, exist_ok=True) + print("*"*50) + + #Loop to save each column of the dataframe for column in range(1, df.shape[1]): column_name = df.columns[column] #Keep name of the column with index column #verify that the column complies with the defined regex pattern if not re.match(column_name_pattern, column_name): - raise ValueError(f"The column name '{column_name}' doesn't have the requiered format [a-z][a-z]-[A-Z][A-Z]") + raise ValueError(f"\n The column name '{column_name}' doesn't have the requiered format [a-z][a-z]-[A-Z][A-Z]") column_name_simple = re.sub(r"^([a-zA-Z]+)-.*", r"\1", column_name) From e4460285fa7e9c7a3fd4a798cb8a83326cc678ce Mon Sep 17 00:00:00 2001 From: anderrow Date: Tue, 8 Apr 2025 09:01:07 +0200 Subject: [PATCH 13/13] index on main: c0434c3 Verify if the columns are correct before generates the files --- JsonGenerator.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/JsonGenerator.py b/JsonGenerator.py index c475bdd..3d2228b 100644 --- a/JsonGenerator.py +++ b/JsonGenerator.py @@ -54,6 +54,15 @@ #Define the column name pattern need it with regex for avoid extrange file names. column_name_pattern = r'^[a-z]{2}-[A-Z]{2}$' +#Verify that the columns have a correct name +for sheet_name, df in df_filtered_dict.items(): + for column in range(1, df.shape[1]): + column_name = df.columns[column] #Keep name of the column with index column + + #verify that the column complies with the defined regex pattern + if not re.match(column_name_pattern, column_name): + raise ValueError(f"\n The column name '{column_name}' doesn't have the requiered format [a-z][a-z]-[A-Z][A-Z]") + # Loop to save the JSON files for sheet_name, df in df_filtered_dict.items(): # Create the folder in the parent directory if it doesn't exist @@ -64,10 +73,6 @@ #Loop to save each column of the dataframe for column in range(1, df.shape[1]): column_name = df.columns[column] #Keep name of the column with index column - - #verify that the column complies with the defined regex pattern - if not re.match(column_name_pattern, column_name): - raise ValueError(f"\n The column name '{column_name}' doesn't have the requiered format [a-z][a-z]-[A-Z][A-Z]") column_name_simple = re.sub(r"^([a-zA-Z]+)-.*", r"\1", column_name)