-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathapp.py
More file actions
72 lines (49 loc) · 1.82 KB
/
app.py
File metadata and controls
72 lines (49 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import json
import os
from collections import defaultdict
import requests
from flask import Flask,render_template, request
import time
from Corpus import NGramGenerator
from cosine_similarity import CosineSimilarity
app = Flask(__name__)
myGenerator = NGramGenerator()
myGenerator.generateIndex("cleaned_files")
# for k,v in myGenerator.one_gram_corpus.items():
# print k
# for ik, iv in v.docTermFreqDict.items():
# print " %d => %d"%(ik,iv)
cs = CosineSimilarity()
cs.createMatix(myGenerator.one_gram_corpus,myGenerator.bi_gram_index,myGenerator.tri_gram_index)
@app.route("/")
def main():
return render_template('/index.html')
@app.route('/search' ,methods=['POST'])
def search():
resultArray = []
now = time.time()
query_word_and_tf = defaultdict(int)
# read the posted values from the UI
_query= request.form['query']
print _query
for word in _query.lower().split():
query_word_and_tf[word] += 1
# get each document score for the given query
doc_and_score_dict = cs.calculateSimilarity(query_word_and_tf, len(query_word_and_tf))
# sort the documents in descending order of their score
sortedDocIds = sorted(doc_and_score_dict.items(), key=lambda t: t[1], reverse=True)
counter = 1
for value in sortedDocIds:
resultArray.append("https://en.wikipedia.org/wiki/"+ '_'.join(value[0][:-16].split()))
print resultArray[counter-1]
if counter > 10:
break
counter +=1
return json.dumps({'totaltime':str(time.time() - now) + " seconds", 'urls': resultArray})
# return "new query"
def application(env, start_response):
start_response('200 OK', [('Content-Type', 'text/html')])
return ["Hello!"]
if __name__ == "__main__":
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port)