Skip to content

Commit 58fe34e

Browse files
authored
Merge pull request #7 from EwenQuim/reduce-memory-usage
reduce memory usage
2 parents a64e5a9 + c23faa5 commit 58fe34e

File tree

2 files changed

+81
-94
lines changed

2 files changed

+81
-94
lines changed

main.go

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,44 @@ type Entropy struct {
3535
Line string // Line with high entropy
3636
}
3737

38+
func NewEntropies(n int) *Entropies {
39+
return &Entropies{
40+
Entropies: make([]Entropy, n),
41+
}
42+
}
43+
44+
// Entropies should be created with a size n using make()
45+
// it should not be written to manually, instead use Entropies.Add
46+
type Entropies struct {
47+
mu sync.Mutex
48+
Entropies []Entropy
49+
}
50+
51+
// Add assumes that es contains an ordered set of entropies.
52+
// It preserves ordering, and inserts an additional value e, if it has high enough entropy.
53+
// In that case, the entry with lowest entropy is rejected.
54+
func (es *Entropies) Add(e Entropy) {
55+
es.mu.Lock()
56+
defer es.mu.Unlock()
57+
58+
if es.Entropies[len(es.Entropies)-1].Entropy >= e.Entropy {
59+
return
60+
}
61+
62+
i, _ := slices.BinarySearchFunc(es.Entropies, e, func(a, b Entropy) int {
63+
if b.Entropy > a.Entropy {
64+
return 1
65+
}
66+
if a.Entropy > b.Entropy {
67+
return -1
68+
}
69+
return 0
70+
})
71+
72+
copy(es.Entropies[i+1:], es.Entropies[i:])
73+
es.Entropies[i] = e
74+
}
75+
3876
func main() {
3977
minCharactersFlag := flag.Int("min", minCharactersDefault, "Minimum number of characters in the line to consider computing entropy")
4078
resultCountFlag := flag.Int("top", resultCountDefault, "Number of results to display")
@@ -64,17 +102,14 @@ func main() {
64102
fmt.Println("No files provided, defaults to current folder.")
65103
fileNames = []string{"."}
66104
}
67-
entropies := make([]Entropy, 0, 10*len(fileNames))
105+
entropies := NewEntropies(resultCount)
68106
for _, fileName := range fileNames {
69-
fileEntropies, err := readFile(fileName)
107+
err := readFile(entropies, fileName)
70108
if err != nil {
71109
fmt.Fprintf(os.Stderr, "Error reading file %s: %v\n", fileName, err)
72110
}
73-
entropies = append(entropies, fileEntropies...)
74111
}
75112

76-
entropies = sortAndCutTop(entropies)
77-
78113
redMark := "\033[31m"
79114
resetMark := "\033[0m"
80115
if !term.IsTerminal(int(os.Stdout.Fd())) {
@@ -83,59 +118,54 @@ func main() {
83118
resetMark = ""
84119
}
85120

86-
for _, entropy := range entropies {
121+
for _, entropy := range entropies.Entropies {
122+
if entropy == (Entropy{}) {
123+
return
124+
}
87125
fmt.Printf("%.2f: %s%s:%d%s %s\n", entropy.Entropy, redMark, entropy.File, entropy.LineNum, resetMark, entropy.Line)
88126
}
89127
}
90128

91-
func readFile(fileName string) ([]Entropy, error) {
129+
func readFile(entropies *Entropies, fileName string) error {
92130
// If file is a folder, walk inside the folder
93131
fileInfo, err := os.Stat(fileName)
94132
if err != nil {
95-
return nil, err
133+
return err
96134
}
97135

98136
if isFileHidden(fileInfo.Name()) && !exploreHidden {
99-
return nil, nil
137+
return nil
100138
}
101139

102-
entropies := make([]Entropy, 0, 10)
103140
if fileInfo.IsDir() {
104141
// Walk through the folder and read all files
105142
dir, err := os.ReadDir(fileName)
106143
if err != nil {
107-
return nil, err
144+
return err
108145
}
109146

110-
entropiies := make([][]Entropy, len(dir))
111-
112147
var wg sync.WaitGroup
113148
for i, file := range dir {
114149
wg.Add(1)
115150
go func(i int, file os.DirEntry) {
116151
defer wg.Done()
117-
fileEntropies, err := readFile(fileName + "/" + file.Name())
152+
err := readFile(entropies, fileName+"/"+file.Name())
118153
if err != nil {
119154
fmt.Fprintf(os.Stderr, "Error reading file %s: %v\n", file.Name(), err)
120155
}
121-
entropiies[i] = fileEntropies
122156
}(i, file)
123157
}
124158

125159
wg.Wait()
126-
127-
for _, fileEntropies := range entropiies {
128-
entropies = append(entropies, fileEntropies...)
129-
}
130160
}
131161

132162
if !isFileIncluded(fileInfo.Name()) {
133-
return sortAndCutTop(entropies), nil
163+
return nil
134164
}
135165

136166
file, err := os.Open(fileName)
137167
if err != nil {
138-
return nil, err
168+
return err
139169
}
140170
defer file.Close()
141171

@@ -150,7 +180,7 @@ func readFile(fileName string) ([]Entropy, error) {
150180
continue
151181
}
152182

153-
entropies = append(entropies, Entropy{
183+
entropies.Add(Entropy{
154184
Entropy: entropy(field),
155185
File: fileName,
156186
LineNum: i,
@@ -159,7 +189,7 @@ func readFile(fileName string) ([]Entropy, error) {
159189
}
160190
}
161191

162-
return sortAndCutTop(entropies), nil
192+
return nil
163193
}
164194

165195
func entropy(text string) float64 {
@@ -210,15 +240,3 @@ func isFileIncluded(filename string) bool {
210240

211241
return false
212242
}
213-
214-
func sortAndCutTop(entropies []Entropy) []Entropy {
215-
slices.SortFunc(entropies, func(a, b Entropy) int {
216-
return int((b.Entropy - a.Entropy) * 10000)
217-
})
218-
219-
if len(entropies) > resultCount {
220-
return entropies[:resultCount]
221-
}
222-
223-
return entropies
224-
}

main_test.go

Lines changed: 28 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ import (
55
)
66

77
func BenchmarkFile(b *testing.B) {
8+
entropies := &Entropies{Entropies: make([]Entropy, 10)}
89
for range b.N {
9-
readFile("testdata")
10+
_ = readFile(entropies, "testdata")
1011
}
1112
}
1213

@@ -46,82 +47,37 @@ func TestEntropy(t *testing.T) {
4647

4748
func TestReadFile(t *testing.T) {
4849
t.Run("random.js", func(t *testing.T) {
49-
res, err := readFile("testdata/random.js")
50+
res := &Entropies{Entropies: make([]Entropy, 10)}
51+
err := readFile(res, "testdata/random.js")
5052
if err != nil {
5153
t.Errorf("expected nil, got %v", err)
5254
}
5355

54-
Expect(t, len(res), 10)
55-
ExpectFloat(t, res[0].Entropy, 5.53614242151549)
56-
Expect(t, res[0].LineNum, 7) // The token is hidden here
57-
ExpectFloat(t, res[4].Entropy, 3.321928094887362)
56+
ExpectFloat(t, res.Entropies[0].Entropy, 5.53614242151549)
57+
Expect(t, res.Entropies[0].LineNum, 7) // The token is hidden here
58+
ExpectFloat(t, res.Entropies[4].Entropy, 3.321928094887362)
5859
})
5960

6061
t.Run("testdata/folder", func(t *testing.T) {
61-
res, err := readFile("testdata/folder")
62+
res := &Entropies{Entropies: make([]Entropy, 10)}
63+
err := readFile(res, "testdata/folder")
6264
if err != nil {
6365
t.Errorf("expected nil, got %v", err)
6466
}
6567

66-
Expect(t, len(res), 10)
67-
ExpectFloat(t, res[0].Entropy, 3.7667029194153567)
68-
Expect(t, res[0].LineNum, 7) // The token is hidden here
69-
ExpectFloat(t, res[6].Entropy, 2.8553885422075336)
68+
ExpectFloat(t, res.Entropies[0].Entropy, 3.7667029194153567)
69+
Expect(t, res.Entropies[0].LineNum, 7) // The token is hidden here
70+
ExpectFloat(t, res.Entropies[6].Entropy, 2.8553885422075336)
7071
})
7172

7273
t.Run("dangling symlink in testdata folder", func(t *testing.T) {
73-
res, err := readFile("testdata")
74+
entropies := NewEntropies(10)
75+
err := readFile(entropies, "testdata")
7476
if err != nil {
7577
t.Errorf("expected nil, got %v", err)
7678
}
7779

78-
Expect(t, len(res), 10)
79-
})
80-
}
81-
82-
func TestSortAndCutTop(t *testing.T) {
83-
resultCount = 5
84-
85-
t.Run("nil", func(t *testing.T) {
86-
res := sortAndCutTop(nil)
87-
if len(res) != 0 {
88-
t.Errorf("expected 0, got %d", len(res))
89-
}
90-
})
91-
92-
t.Run("empty", func(t *testing.T) {
93-
res := sortAndCutTop([]Entropy{})
94-
if len(res) != 0 {
95-
t.Errorf("expected 0, got %d", len(res))
96-
}
97-
})
98-
99-
t.Run("less than resultCount", func(t *testing.T) {
100-
res := sortAndCutTop([]Entropy{
101-
{Entropy: 0.1},
102-
{Entropy: 0.6},
103-
{Entropy: 0.3},
104-
})
105-
106-
Expect(t, len(res), 3)
107-
Expect(t, res[0].Entropy, 0.6)
108-
Expect(t, res[2].Entropy, 0.1)
109-
})
110-
111-
t.Run("more than resultCount", func(t *testing.T) {
112-
res := sortAndCutTop([]Entropy{
113-
{Entropy: 0.1},
114-
{Entropy: 0.6},
115-
{Entropy: 0.3},
116-
{Entropy: 0.7},
117-
{Entropy: 0.4},
118-
{Entropy: 0.5},
119-
{Entropy: 0.2},
120-
})
121-
122-
Expect(t, len(res), 5)
123-
Expect(t, res[0].Entropy, 0.7)
124-
Expect(t, res[4].Entropy, 0.3)
80+
Expect(t, len(entropies.Entropies), 10)
12581
})
12682
}
12783

@@ -164,6 +120,19 @@ func TestIsFileHidden(t *testing.T) {
164120
Expect(t, isFileHidden(".env"), true)
165121
}
166122

123+
func TestEntropies(t *testing.T) {
124+
res := &Entropies{Entropies: make([]Entropy, 5)}
125+
for _, i := range []float64{1, 3, 5, 7, 2, 4, 6, 8} {
126+
res.Add(Entropy{Entropy: i})
127+
}
128+
129+
Expect(t, res.Entropies[0].Entropy, 8)
130+
Expect(t, res.Entropies[1].Entropy, 7)
131+
Expect(t, res.Entropies[2].Entropy, 6)
132+
Expect(t, res.Entropies[3].Entropy, 5)
133+
Expect(t, res.Entropies[4].Entropy, 4)
134+
}
135+
167136
func Expect[T comparable](t *testing.T, got, expected T) {
168137
t.Helper()
169138
if got != expected {

0 commit comments

Comments
 (0)