-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest.py
More file actions
73 lines (51 loc) · 2.23 KB
/
test.py
File metadata and controls
73 lines (51 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import naivebayes
import xml.etree.ElementTree as ET
import nltk
tree = ET.parse('Alprazolam_ddi.xml')
root = tree.getroot()
docs = []
texts = []
with open('edited_testfile.txt') as f:
words = f.read().split()
drugs = []
for elem in root:
for subelem in elem.findall('entity'):
drugs.append(subelem.get('text'))
for s in words:
if s in drugs:
label = "N"
else:
label = "I"
for w in s.split(" "):
if len(w)>0:
texts.append((w, label))
docs.append(texts)
size = int(len(docs) * 0.7)
tags = [tag for (word, tag) in docs[0][:1]]
defaultTag = nltk.FreqDist(tags).max()
train_sents = docs[:size]
test_sents = docs[size:]
tagsDict = {}
for index, tag in enumerate(set(tags)):
tagsDict[tag] = index
trainSeqFeatures, trainSeqLabels = naivebayes.transformDatasetSequence(train_sents)
testSeqFeatures, testSeqLabels = naivebayes.transformDatasetSequence(test_sents)
tagProbs, startProbs, transMat, emissionMat, featuresDict = naivebayes.trainHMM(trainSeqFeatures[:30000], trainSeqLabels[:30000], tagsDict)
predictedTags = naivebayes.predictTags(testSeqFeatures[:100], tagProbs, startProbs, transMat, emissionMat, tagsDict, featuresDict)
print(naivebayes.computeSeqAccuracy(predictedTags, [[tagsDict[tag] for tag in tags] for tags in testSeqLabels]))
tagger = naivebayes.ngramTagger(train_sents, 2, defaultTag)
print(tagger.evaluate(test_sents))
trainFeatures, trainLabels =naivebayes.transformDataset(train_sents)
testFeatures, testLabels = naivebayes.transformDataset(test_sents)
tree_model, tree_model_cv_score = naivebayes.trainDecisionTree(trainFeatures[:30000], trainLabels[:30000])
print(tree_model_cv_score)
print(tree_model.score(testFeatures, testLabels))
nb_model, nb_model_cv_score = naivebayes.trainNaiveBayes(trainFeatures[:30000], trainLabels[:30000])
print(nb_model_cv_score)
print(nb_model.score(testFeatures, testLabels))
nn_model, nn_model_cv_score = naivebayes.trainNN(trainFeatures[:30000], trainLabels[:30000])
print(nn_model_cv_score)
print(nn_model.score(testFeatures, testLabels))
crf_model = naivebayes.trainCRF(trainSeqFeatures[:30000], trainSeqLabels[:30000])
pred_labels = crf_model.predict(testSeqFeatures)
print(naivebayes.computeSeqAccuracy(pred_labels, testSeqLabels))