Anti-Depression-Bot/outcome.py at master · FreeFlowOrg/Anti-Depression-Bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
pd.options.mode.chained_assignment = None
import json
from sklearn.externals import joblib
# import statsmodels.formula.api as smf
import sklearn.metrics as sm
from math import ceil
from sklearn import linear_model


def predict_outcome(recipient_dic):
	data_input = pd.read_csv("depression.csv")
	data = data_input[['Time','Age','Gender','AcuteT']]	#The machine learning model will take the following as the inputs to predict


	expected_output = data_input[['Outcome']]		#The output that is expected is here
	input_train, input_test, expected_op_train, expected_op_test = train_test_split(data, expected_output, test_size=0.29, random_state = 10000)	#Dividing the avaiable dataset to train & test

	rf = RandomForestClassifier(n_estimators = 100)		#The ML Algorithm that we're using
	rf.fit(input_train, expected_op_train)			#Getting the best fit for the curve by using the fit function

	accuracy = rf.score(input_test, expected_op_test)


	joblib.dump(rf, "models/Outcome_Model", compress = 9)
	rf = joblib.load("models/Outcome_Model")		#Stores the machine learning model by the name "Outcome_Model"

	time = 100
	age = int(recipient_dic['age'])
	if recipient_dic['gender'] == 'male':
		gender = 2
	else:
		gender = 1
	acutet = int(recipient_dic['how_long'])
	outcome = rf.predict([[time,age,gender,acutet]])

	# print(outcome, accuracy*100)
	print(outcome[0])
	print(accuracy*100)
	string_temp = "Based on the analysis, the depression metrics are \n 1. The depression is " + str(outcome[0]) + "\n 2. This result is with an accuracy of " +str(accuracy*100)+"%"
	print(string_temp)
	return string_temp

def predict_treatment():
	data_input = pd.read_csv("depression.csv")		#Reading the csv file using pandas
	data_input['Treat'].replace('Lithium', 3, inplace = True)
	data_input['Treat'].replace('Imipramine', 2, inplace = True)
	data_input['Treat'].replace('Placebo', 1, inplace = True)
	data_input['Outcome'] = np.where(data_input['Outcome'] == 'Recurrence', 1, 0)	#This is just to clean the data to convert the words into numbers for the ML Algo

	data = data_input[['Outcome','Time','AcuteT','Age','Gender']]
	expected_output = data_input[['Treat']]

	lm = linear_model.LinearRegression()		#Calling the regression algorithm and creating object
	model = lm.fit( data , expected_output)		#Getting the best fit for the curve by using the fit function

	predictions = lm.predict(data[['Outcome','Time','AcuteT','Age','Gender']])

	'''time = 32.6
	age = 48
	gender = 2
	acutet = 284
	outcome = predict_outcome()'''
	treat = ceil(lm.predict([[1, 32.6, 284, 48, 2]]))	#Taking ceil because the regression algorithm here predicts low values and the ceil function makes predictions more accurate

	#print("MSE:", sm.mean_squared_error(predictions, data_input['Treat'].values))	#Calculates how good the fit is. Finds the error between the predicted values and actual ones.
	joblib.dump(model, "models/Treatment_Model", compress = 9)	#Stores the machine learning model by the name "Treatment_Model"

	#print(treat)

# predict_outcome()