From 4f4a89a3ca2c6f4d40d0c1944c31a33be31d33b5 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Tue, 18 Feb 2020 23:37:34 +0000 Subject: [PATCH 1/2] Add in script to convert reports ready for powerBI Adding file provided by @geojayuu --- mapactionpy_controller/convert_json_to_csv.py | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 mapactionpy_controller/convert_json_to_csv.py diff --git a/mapactionpy_controller/convert_json_to_csv.py b/mapactionpy_controller/convert_json_to_csv.py new file mode 100644 index 0000000..3f9ce19 --- /dev/null +++ b/mapactionpy_controller/convert_json_to_csv.py @@ -0,0 +1,94 @@ +import os +import sys +import pandas as pd +import json + + +## +# +def parse_directory(sourceDir, extList): + sourceList = [] + for root, dirs, files in os.walk(sourceDir): + for file in files: + for ext in extList: + if file.endswith(ext): + #print(os.path.join(root, file)) + #sourceList.append([root, file]) + sourceList.append(os.path.join(root, file)) + return sourceList + +## +# +def process_json_report(json_source): + # Load JSON into Dictionary Type object + # Note the JSON headers don't follow order of the file + with open(json_source) as json_data: + json_d = json.load(json_data) + + # Various structure definitions - obtain from the Metis codebase + # To ensure final column order + prod_details = [ + 'classification', + 'productName', + 'result', + 'summary', + ] + + correct_order = [ + 'classification', + 'productName', + 'result', + 'added', # Unpacked from results + 'dataSource', # Unpacked from results + 'dateStamp', # Unpacked from results + 'hash', # Unpacked from results + 'layerName', # Unpacked from results + 'message', # Unpacked from results + 'summary', + ] + + # Add to a Pandas Data frame - or process into seprate tables. + df_start = pd.DataFrame.from_dict(json_d) + df_ok = df_start[prod_details] + df_ok = df_ok.drop_duplicates() + # + df_results = pd.DataFrame.from_dict(json_d['results']) + # + df_final = df_results + # Append results values + for col in df_ok.columns.values: + df_final[col] = df_ok[col][0] + # Reorder cols + df_final = df_final[correct_order] + return df_final + + +# +def main(arvg=None): + # Define location of folder with JSON report files. + # To do - set up argument parsing... + root = r'J:\04_MapAction\Projects\Metis\TaskManager' + json_dir = r'json_examples' + json_dir = os.path.join(root, json_dir) + # Get all reports as a list of file paths + reports = parse_directory(json_dir, ['.json']) + # Loop through the folder of JSON reports + # Process each source json file in turn + # Append to a list object for final aggregation + all_product_output = [] + for json_source in reports: + print("Processing {0}...".format(json_source)) + df_processed_json = process_json_report(json_source) + all_product_output.append(df_processed_json) + print('Finished processing.') + # Merge all dfs into a single df + df_all = pd.concat(all_product_output) + # Export to csv + df_all.to_csv('merged_reports.csv', index=False) + print('End.') + + +if __name__ == '__main__': + main() + + From 4e4a8c3a511a9233061b32b06dfcab26a06cf12b Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Wed, 19 Feb 2020 00:01:36 +0000 Subject: [PATCH 2/2] pep8 fixes --- mapactionpy_controller/convert_json_to_csv.py | 54 +++++++++---------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/mapactionpy_controller/convert_json_to_csv.py b/mapactionpy_controller/convert_json_to_csv.py index 3f9ce19..6b7e5fc 100644 --- a/mapactionpy_controller/convert_json_to_csv.py +++ b/mapactionpy_controller/convert_json_to_csv.py @@ -1,51 +1,47 @@ import os -import sys import pandas as pd import json -## -# def parse_directory(sourceDir, extList): sourceList = [] for root, dirs, files in os.walk(sourceDir): for file in files: for ext in extList: if file.endswith(ext): - #print(os.path.join(root, file)) - #sourceList.append([root, file]) + # print(os.path.join(root, file)) + # sourceList.append([root, file]) sourceList.append(os.path.join(root, file)) return sourceList -## -# + def process_json_report(json_source): # Load JSON into Dictionary Type object # Note the JSON headers don't follow order of the file - with open(json_source) as json_data: + with open(json_source) as json_data: json_d = json.load(json_data) # Various structure definitions - obtain from the Metis codebase # To ensure final column order prod_details = [ - 'classification', - 'productName', - 'result', - 'summary', - ] + 'classification', + 'productName', + 'result', + 'summary', + ] correct_order = [ - 'classification', - 'productName', - 'result', - 'added', # Unpacked from results - 'dataSource', # Unpacked from results - 'dateStamp', # Unpacked from results - 'hash', # Unpacked from results - 'layerName', # Unpacked from results - 'message', # Unpacked from results - 'summary', - ] + 'classification', + 'productName', + 'result', + 'added', # Unpacked from results + 'dataSource', # Unpacked from results + 'dateStamp', # Unpacked from results + 'hash', # Unpacked from results + 'layerName', # Unpacked from results + 'message', # Unpacked from results + 'summary', + ] # Add to a Pandas Data frame - or process into seprate tables. df_start = pd.DataFrame.from_dict(json_d) @@ -63,13 +59,13 @@ def process_json_report(json_source): return df_final -# +# def main(arvg=None): # Define location of folder with JSON report files. # To do - set up argument parsing... - root = r'J:\04_MapAction\Projects\Metis\TaskManager' - json_dir = r'json_examples' - json_dir = os.path.join(root, json_dir) + root = r'J:\04_MapAction\Projects\Metis\TaskManager' + json_dir = r'json_examples' + json_dir = os.path.join(root, json_dir) # Get all reports as a list of file paths reports = parse_directory(json_dir, ['.json']) # Loop through the folder of JSON reports @@ -90,5 +86,3 @@ def main(arvg=None): if __name__ == '__main__': main() - -