-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfit_test.py
More file actions
57 lines (48 loc) · 1.48 KB
/
fit_test.py
File metadata and controls
57 lines (48 loc) · 1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import matplotlib.pyplot as plot
import numpy as np
import json
import os
import pandas
def divide_data(filesource='source/rmrb',gap=20,length=10000):
filenames = os.listdir(filesource)
limit = (int)(length/gap)
res = []
left = 0
count = 0
front = 0
frontLimit = 1
for element in range(int(length/gap)+1):
res.append(0)
for filepath in filenames:
try:
with open(filesource+'/'+filepath,encoding='utf-8') as f:
jsonData = json.load(f)
for dic in jsonData:
count +=1
num = dic.get('评论数量')
index = (int)(num/gap)
if index >limit:
left += 1
elif index <= limit:
front +=1
res[index] = res[index]+1
except:
print(filepath)
frontNum = 0
for element in range(0,frontLimit):
frontNum += res[element]
x=[]
print(frontNum)
for element in range(0,limit+1):
x.append(element * gap)
print(front)
dic1 = {'num_gap':x,'num_of_each':res}
df = pandas.DataFrame(dic1)
df.to_excel('rmrb.xlsx',index=False)
plot.plot(x,res)
plot.show()
print('more than {}: {}'.format(length,left))
print("total:{}".format(count))
print("front {} make up for {:.4f}%".format(frontLimit*gap,frontNum/count*100))
if __name__ == '__main__':
divide_data('source/xlxw',100)