From cb97e2e45a73007628a853ba8293e44f1e768158 Mon Sep 17 00:00:00 2001 From: optimizing-ci-builds Date: Sun, 6 Aug 2023 22:20:30 -0400 Subject: [PATCH] Changed yaml files --- .github/workflows/ci.yml | 59 ++++++++ .github/workflows/inotify_script.py | 22 +++ .github/workflows/script.py | 223 ++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+) create mode 100644 .github/workflows/inotify_script.py create mode 100644 .github/workflows/script.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7868ca..d9ce1f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,11 +7,70 @@ jobs: runs-on: ubuntu-latest steps: + - uses: actions/setup-python@v2 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pandas + pip install numpy + pip install inotify + - run: sudo apt update + - uses: actions/checkout@v3 + - run: python .github/workflows/inotify_script.py /home/runner/work/GradleTestCI/GradleTestCI /home/runner/inotify-logs.csv & echo 'optimizing-ci-builds' + - run: touch starting_build_uses-checkout_10 + - run: rm starting_build_uses-checkout_10 - uses: actions/checkout@v3 + - run: touch starting_build_SetupJDK8_11 + - run: rm starting_build_SetupJDK8_11 - name: Set up JDK 8 uses: actions/setup-java@v3 with: java-version: '8' distribution: 'adopt' + - run: touch starting_build_Buildmodules_16 + - run: rm starting_build_Buildmodules_16 - name: Build modules run: gradle clean build + - run: touch starting_finished_finished_8979874 + if: always() + - run: rm starting_finished_finished_8979874 + if: always() + - name: rat check + if: always() + run: | + if [ -f /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt ]; then cat /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt; fi + - name: Check script file exists and execute + if: always() + run: | + [ -f .github/workflows/script.py ] && python .github/workflows/script.py + [ -f /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv ] || mkdir -p /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/; echo "${GITHUB_RUN_ID},${GITHUB_JOB},GradleTestCI,${GITHUB_WORKFLOW}" > /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv + - name: Checkout to destination CI-analyzes repo + uses: actions/checkout@v3 + if: always() + with: + path: GradleTestCI + ref: '1691374686-548a464' + repository: 'UT-SE-Research/ci-analyzes' + token: '${{ secrets.API_TOKEN_GITHUB }}' + persist-credentials: true + - name: Copy files to push to another directory + if: always() + run: | + mkdir -p GradleTestCI/GradleTestCI/.github/workflows/ci/build + cp -rvT optimizing-ci-builds-ci-analysis GradleTestCI/GradleTestCI/.github/workflows/ci/build + - run: echo https://github.com/UT-SE-Research/ci-analyzes/tree/1691374686-548a464/GradleTestCI/.github/workflows/ci/build + - name: Pushes analysis to another repository + if: always() + working-directory: GradleTestCI + run: | + commit_message=$GITHUB_REPOSITORY@$GITHUB_WORKFLOW_SHA + git config --global user.name 'UT-SE-Research' + git config --global user.email '${{ secrets.EMAIL }}' + git add . + git commit -m $commit_message + while ! git push origin 1691374686-548a464; do + git pull --rebase origin 1691374686-548a464 + sleep $((RANDOM % 5 + 1)) + done diff --git a/.github/workflows/inotify_script.py b/.github/workflows/inotify_script.py new file mode 100644 index 0000000..4a4662f --- /dev/null +++ b/.github/workflows/inotify_script.py @@ -0,0 +1,22 @@ +import inotify.adapters +from datetime import datetime +import sys + +target_dir = sys.argv[1] +log_file = sys.argv[2] + +with open(log_file, "w") as f: + pass + +def _main(): + i = inotify.adapters.InotifyTree(target_dir) + + for event in i.event_gen(yield_nones=False): + (_, type_names, path, filename) = event + timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ') + with open(log_file, "a") as f: + f.write(f"{timestamp};{path};{filename};{','.join(type_names)}\n") + f.flush() + +if __name__ == '__main__': + _main() \ No newline at end of file diff --git a/.github/workflows/script.py b/.github/workflows/script.py new file mode 100644 index 0000000..0c7ab15 --- /dev/null +++ b/.github/workflows/script.py @@ -0,0 +1,223 @@ +import pandas as pd +import numpy as np +import os +import shutil + +def show_directories(file_path): + with open(file_path, 'r') as f: + df = pd.read_csv(file_path, sep=',') + paths = df["file_name"].to_list() + root = TreeNode("", None) + + for path in paths: + find_and_insert(root, path.split("/")[1:]) + + stack = [] + root.print(True, stack) + return stack + + +class TreeNode: + def __init__(self, name, parent): + self.parent = parent + self.name = name + self.number_of_children = 0 + self.children = [] + + def add_child(self, node): + self.children.append(node) + self.number_of_children+=1 + return node + + def print(self, is_root, stack): + pre_0 = " " + pre_1 = "\u2502 " + pre_2 = "\u251c\u2500\u2500 " + pre_3 = "\u2514\u2500\u2500 " + + tree = self + prefix = pre_2 if tree.parent and id(tree) != id(tree.parent.children[-1]) else pre_3 + + while tree.parent and tree.parent.parent: + if tree.parent.parent and id(tree.parent) != id(tree.parent.parent.children[-1]): + prefix = pre_1 + prefix + else: + prefix = pre_0 + prefix + + tree = tree.parent + + if is_root: + stack.append(self.name) + else: + stack.append(f"{prefix} {self.name} {str(self.number_of_children)}") + + for child in self.children: + child.print(False, stack) + + +def find_and_insert(parent, edges): + # Terminate if there is no edge + if not edges: + return + + # Find a child with the name edges[0] in the current node + match = [tree for tree in parent.children if tree.name == edges[0]] + + # If there is already a node with the name edges[0] in the children, set "pointer" tree to this node. If there is no such node, add a node in the current tree node then set "pointer" tree to it + tree = match[0] if match else parent.add_child(TreeNode(edges[0], parent)) + + # Recursively process the following edges[1:] + find_and_insert(tree, edges[1:]) + + +df = pd.read_csv('/home/runner/inotify-logs.csv', sep = ';', names=['time', 'watched_filename', 'event_filename', 'event_name']) +df['event_filename'] = df['event_filename'].replace(np.nan, '') +steps = {} +starting_indexes = df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'CREATE')].index.to_list() + [df.shape[0]] +ending_indexes = [0] + df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'DELETE')].index.to_list() +starting_df = df[df['event_filename'].str.contains('starting_')] +touch_file_names = ['setup'] + [x.replace('starting_', '') for x in starting_df['event_filename'].value_counts().index.to_list()] +for starting_index, ending_index, touch_file_name in zip(starting_indexes, ending_indexes, touch_file_names): + if touch_file_name == 'setup': continue + steps[touch_file_name] = (ending_index, starting_index) +touch_file_names.pop(0) +df['watched_filename'] = df['watched_filename'] + df['event_filename'] +df.drop('event_filename', axis=1, inplace=True) +df.rename(columns={'watched_filename':'file_name'}, inplace=True) +modify_df = df[(df['event_name'] == 'MODIFY') | (df['event_name'] == 'CREATE')] +file_names = modify_df['file_name'].value_counts().index.to_list() +info = [] +useful = [] + +for file_name in file_names: + last_access_step = '' + last_modify_step = '' + creation_step = '' + if df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].shape[0] == 0: last_modify_index = -1; last_modify_step = 'Not provided' + else: last_modify_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].index.to_list()[-1] + if df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].shape[0] == 0: last_access_index = -1; last_access_step = 'Not provided' + else: last_access_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].index.to_list()[-1] + if df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].shape[0] == 0: creation_index = -1; creation_step = 'Not provided' + else: creation_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].index.to_list()[0] + + if last_access_index < last_modify_index: + for touch_file_name, (starting_index, ending_index) in steps.items(): + if (last_access_index > starting_index) and (last_access_index < ending_index): + last_access_step = touch_file_name.split('_')[1] + if (last_modify_index > starting_index) and (last_modify_index < ending_index): + last_modify_step = touch_file_name.split('_')[1] + if (creation_index > starting_index) and (creation_index < ending_index): + creation_step = touch_file_name.split('_')[1] + if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name: + info.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step}) + + if last_access_index > last_modify_index: + for touch_file_name, (starting_index, ending_index) in steps.items(): + if (last_access_index > starting_index) and (last_access_index < ending_index): + last_access_step = touch_file_name.split('_')[1] + if (last_modify_index > starting_index) and (last_modify_index < ending_index): + last_modify_step = touch_file_name.split('_')[1] + if (creation_index > starting_index) and (creation_index < ending_index): + creation_step = touch_file_name.split('_')[1] + if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name: + useful.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step}) + +os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis') +os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details') + +# Add the job.csv file to the directory optimizing-ci-builds-ci-analysis +current_run_id = os.environ['GITHUB_RUN_ID'] +job_id = os.environ['GITHUB_JOB'] +workflow = os.environ['GITHUB_WORKFLOW'] +save_path = f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv' +# write the current_run_id to the file job.csv +with open(save_path, 'w') as f: + f.write(current_run_id + ',' + job_id + ',' + f"GradleTestCI" + ',' + workflow) + +info_flag=0 +if ( len(info) > 0 ): + info_df = pd.DataFrame(info) + info_flag=1 +useful_df = pd.DataFrame(useful) +step_statistics = [] +print(info_flag) +if (info_flag == 1) : + for step, (starting_index, ending_index) in steps.items(): + step_name = step.split('_')[1] + if step_name == 'finished': continue + c = info_df['creation_step'] == step_name + m = info_df['last_modify_step'] == step_name + a = info_df['last_access_step'] == step_name + # _a is accessed in another step + # __a is never accessed + _a = (info_df['last_access_step'] != step_name) & (info_df['last_access_index'] != -1) + __a = info_df['last_access_index'] == -1 + cma = info_df[c & m & a].shape[0] + cm_a = info_df[c & m & _a].shape[0] + cm__a = info_df[c & m & __a].shape[0] + c_ma = info_df[c & ~m & a].shape[0] + c_m_a = info_df[c & ~m & _a].shape[0] + c_m__a = info_df[c & ~m & __a].shape[0] + _cma = info_df[~c & m & a].shape[0] + _cm_a = info_df[~c & m & _a].shape[0] + _cm__a = info_df[~c & m & __a].shape[0] + _c_ma = info_df[~c & ~m & a].shape[0] + _c_m_a = info_df[~c & ~m & _a].shape[0] + _c_m__a = info_df[~c & ~m & __a].shape[0] + created_file_count = info_df[c].shape[0] + modified_file_count = info_df[m].shape[0] + starting_time = list(map(int, df.iloc[starting_index]['time'].split(':'))) + if ending_index == len(df): ending_time = list(map(int, df.iloc[ending_index-1]['time'].split(':'))) + else: ending_time = list(map(int, df.iloc[ending_index]['time'].split(':'))) + hour = ending_time[0] - starting_time[0] + if starting_time[1] > ending_time[1]: + minute = ending_time[1] - starting_time[1] + 60 + hour -= 1 + else: minute = ending_time[1] - starting_time[1] + if starting_time[2] > ending_time[2]: + second = ending_time[2] - starting_time[2] + 60 + minute -= 1 + else: second = ending_time[2] - starting_time[2] + total_seconds = second + (minute * 60) + (hour * 60 * 60) + if step_name != '': + if not os.path.exists(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}'): + os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}') + if created_file_count > 0: info_df[c]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c.csv') + if modified_file_count > 0: info_df[m]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/m.csv') + if cma > 0: info_df[c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cma.csv') + if cm_a > 0: info_df[c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm_a.csv') + if cm__a > 0: info_df[c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm__a.csv') + if c_ma > 0: info_df[c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_ma.csv') + if c_m_a > 0: info_df[c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m_a.csv') + if c_m__a > 0: info_df[c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m__a.csv') + if _cma > 0: info_df[~c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cma.csv') + if _cm_a > 0: info_df[~c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm_a.csv') + if _cm__a > 0: info_df[~c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm__a.csv') + if _c_ma > 0: info_df[~c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_ma.csv') + if _c_m_a > 0: info_df[~c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m_a.csv') + if _c_m__a > 0: info_df[~c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m__a.csv') + step_statistics.append({'step_name': step_name, '#c': created_file_count, '#m': modified_file_count, + 'cma': cma, 'cm_a': cm_a, 'cm__a': cm__a, 'c_ma': c_ma, 'c_m_a': c_m_a, 'c_m__a': c_m__a, '_cma': _cma, '_cm_a': _cm_a, '_cm__a': _cm__a, '_c_ma': _c_ma, '_c_m_a': _c_m_a, '_c_m__a': _c_m__a, 'time': total_seconds}) + step_df = pd.DataFrame(step_statistics) + step_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/steps.csv') + info_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv') + useful_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/useful.csv') + directories = show_directories('/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv') + string_version = '' + for line in directories: + string_version += line + '\n' + with open("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/directories.txt", "w+", encoding="utf-8") as f: + f.write(string_version) +shutil.copy2("/home/runner/inotify-logs.csv", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/") + +size = 0 +Folderpath = "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis" +for path, dirs, files in os.walk(Folderpath): + for f in files: + fp = os.path.join(path, f) + size += os.path.getsize(fp) +if size > 99000000: + shutil.make_archive("optimizing-ci-builds-ci-analysis", "zip", Folderpath) + shutil.rmtree(Folderpath) + os.mkdir(Folderpath) + shutil.move("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis.zip", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis")