-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_kb.py
More file actions
135 lines (117 loc) · 5.3 KB
/
generate_kb.py
File metadata and controls
135 lines (117 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import subprocess
output_kb_file = '1gency-app.kb'
excluded_files = ['./client/package-lock.json', './client/yarn.lock']
directories_to_pack = [
'backend',
'backend/scripts',
'backend/scripts/recreate_db.sh',
'backend/scripts/import_skills.sh',
'backend/src',
'backend/src/database',
'backend/src/parser',
'backend/src/templates',
'backend/src/application',
'client',
'client/public',
'client/src',
'client/src/forms',
'client/src/forms/mixins',
'client/src/forms/auto',
'client/src/forms/ext',
'client/src/forms/shared',
'client/src/utils',
'client/src/views',
'client/src/views/mixins',
'client/src/views/auto',
'client/src/views/ext',
'client/src/views/shared',
'client/src/views/components',
'client/src/assets',
'client/src/store',
'client/src/store/modules',
'client/src/router'
]
# Function to check if a file is binary by examining its content
def is_binary(file_path):
try:
with open(file_path, 'rb') as f:
# Read the first 8000 bytes and check for null bytes
return b'\x00' in f.read(8000)
except Exception as e:
# Handle any exceptions that may occur during file reading and log them
print(f"Error while checking binary for {file_path}: {str(e)}")
return True
# Function to check if a file is tracked by Git
def is_git_tracked(file_path):
try:
# Convert file path to relative path from source_dir
relative_file_path = os.path.relpath(file_path, './')
# Running 'git ls-files' command to check if file is tracked by Git
result = subprocess.run(['git', 'ls-files', relative_file_path], capture_output=True, text=True, cwd='./')
return relative_file_path == result.stdout.strip()
except subprocess.SubprocessError as e:
print(f"Error checking Git for {file_path}: {str(e)}")
return False
def pack_project_to_kb(source_dir, output_file, dirs_list):
added_files = [] # List to store added file paths
extension_stats = {} # Dictionary to store file extension statistics
with open(output_file, 'w') as outfile:
for dir_name in dirs_list:
dir_path = os.path.join(source_dir, dir_name)
# Check if the directory exists
if os.path.exists(dir_path) and os.path.isdir(dir_path):
# Writing the directory path
outfile.write(dir_path + '\n')
print(f"Directory path: {dir_path}")
for file_name in os.listdir(dir_path):
file_path = os.path.join(dir_path, file_name)
if os.path.isfile(file_path):
print(f"Checking file: {file_path}")
# Excluding specific files
if file_path not in excluded_files:
if is_git_tracked(file_path):
if not is_binary(file_path):
outfile.write(file_path + '\n')
added_files.append(file_path) # Add file to the list
print(f"Writing file: {file_path}")
# Update extension statistics
ext = os.path.splitext(file_path)[1]
extension_stats[ext] = extension_stats.get(ext, 0) + 1
# Reading and writing the file content
with open(file_path, 'r', encoding='utf-8', errors='ignore') as infile:
outfile.write(infile.read())
# Writing the separator line
outfile.write('\n' + '-' * 55 + '\n')
print(f"Separator line written")
else:
print(f"File is binary: {file_path}")
else:
print(f"Excluded non-Git file: {file_path}")
else:
print(f"Excluded file: {file_path}")
else:
print(f"Directory not found: {dir_path}")
def separator():
print('\n' + '-' * 55 + '\n')
separator()
# Calculate statistics
num_files_added = len(added_files)
total_size_kb = sum(os.path.getsize(file) for file in added_files) / 1024
print(f"Number of files added: {num_files_added}")
print(f"Total size of resulting file (KB): {total_size_kb:.2f}")
separator()
# Print extension statistics
print("File extension statistics:")
for ext, count in extension_stats.items():
print(f"{ext if ext else '(no extension)'}: {count} file(s)")
separator()
# Get the top 20 files by size in descending order
top_20_files = sorted(added_files, key=lambda file: os.path.getsize(file), reverse=True)[:20]
print("Top 20 files by size (in kilobytes):")
for file_path in top_20_files:
file_size_kb = os.path.getsize(file_path) / 1024
print(f"{file_path} - {file_size_kb:.2f} KB")
# Specify the source directory, output file, and list of directories to pack
source_directory = './'
pack_project_to_kb(source_directory, output_kb_file, directories_to_pack)