diff --git a/decision_tree_ensembles/tree_ensemble_example.ipynb b/decision_tree_ensembles/tree_ensemble_example.ipynb
index 50d5ede..e402448 100644
--- a/decision_tree_ensembles/tree_ensemble_example.ipynb
+++ b/decision_tree_ensembles/tree_ensemble_example.ipynb
@@ -203,29 +203,15 @@
"cell_type": "code",
"execution_count": 10,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "clf = DecisionTreeClassifier(random_state=0)\n",
- "clf = RandomForestClassifier(random_state=0)\n",
- "clf = AdaBoostClassifier(random_state=0)\n",
- "clf = GradientBoostingClassifier(random_state=0)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"clf_list = []\n",
- "model_name_list = ['DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier']\n",
+ "model_name_list = [DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier]\n",
"\n",
"# train model\n",
"for model_name in model_name_list:\n",
"\n",
- " # dynamically write command to train each classifier in model_name_list\n",
- " cmd = 'clf = ' + model_name + '(random_state=0)'\n",
- " print(cmd) # print command\n",
- " exec(cmd) # execute command\n",
+ " clf = model_name(random_state=0)\n",
" \n",
" # fit data to model\n",
" clf = clf.fit(X_train, y_train)\n",
@@ -241,12 +227,7 @@
"outputs": [
{
"data": {
- "text/plain": [
- "[DecisionTreeClassifier(random_state=0),\n",
- " RandomForestClassifier(random_state=0),\n",
- " AdaBoostClassifier(random_state=0),\n",
- " GradientBoostingClassifier(random_state=0)]"
- ]
+ "text/plain": "[DecisionTreeClassifier(random_state=0),\n RandomForestClassifier(random_state=0),\n AdaBoostClassifier(random_state=0),\n GradientBoostingClassifier(random_state=0)]"
},
"execution_count": 11,
"metadata": {},
@@ -268,86 +249,34 @@
"cell_type": "code",
"execution_count": 12,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "y_pred_train = clf.predict(X_train)\n",
- "prec_val_list.append(precision_score(y_train, y_pred_train))\n",
- "prec_val_list.append(recall_score(y_train, y_pred_train))\n",
- "prec_val_list.append(f1_score(y_train, y_pred_train))\n",
- "\n",
- "y_pred_test = clf.predict(X_test)\n",
- "prec_val_list.append(precision_score(y_test, y_pred_test))\n",
- "prec_val_list.append(recall_score(y_test, y_pred_test))\n",
- "prec_val_list.append(f1_score(y_test, y_pred_test))\n",
- "\n",
- "y_pred_train = clf.predict(X_train)\n",
- "prec_val_list.append(precision_score(y_train, y_pred_train))\n",
- "prec_val_list.append(recall_score(y_train, y_pred_train))\n",
- "prec_val_list.append(f1_score(y_train, y_pred_train))\n",
- "\n",
- "y_pred_test = clf.predict(X_test)\n",
- "prec_val_list.append(precision_score(y_test, y_pred_test))\n",
- "prec_val_list.append(recall_score(y_test, y_pred_test))\n",
- "prec_val_list.append(f1_score(y_test, y_pred_test))\n",
- "\n",
- "y_pred_train = clf.predict(X_train)\n",
- "prec_val_list.append(precision_score(y_train, y_pred_train))\n",
- "prec_val_list.append(recall_score(y_train, y_pred_train))\n",
- "prec_val_list.append(f1_score(y_train, y_pred_train))\n",
- "\n",
- "y_pred_test = clf.predict(X_test)\n",
- "prec_val_list.append(precision_score(y_test, y_pred_test))\n",
- "prec_val_list.append(recall_score(y_test, y_pred_test))\n",
- "prec_val_list.append(f1_score(y_test, y_pred_test))\n",
- "\n",
- "y_pred_train = clf.predict(X_train)\n",
- "prec_val_list.append(precision_score(y_train, y_pred_train))\n",
- "prec_val_list.append(recall_score(y_train, y_pred_train))\n",
- "prec_val_list.append(f1_score(y_train, y_pred_train))\n",
- "\n",
- "y_pred_test = clf.predict(X_test)\n",
- "prec_val_list.append(precision_score(y_test, y_pred_test))\n",
- "prec_val_list.append(recall_score(y_test, y_pred_test))\n",
- "prec_val_list.append(f1_score(y_test, y_pred_test))\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"performance_dict = {}\n",
"\n",
- "dataset_name_list = ['train', 'test']\n",
- "metric_name_list = ['precision', 'recall', 'f1']\n",
+ "dataset_list = [\n",
+ " (X_train, y_train, \"train\"),\n",
+ " (X_test, y_test, \"test\"),\n",
+ "]\n",
+ "metric_name_list = {precision_score: \"precision\", recall_score: \"recall\", f1_score: \"f1\"}\n",
+ "col_name_list = []\n",
"\n",
"for i in range(len(model_name_list)):\n",
- " \n",
+ "\n",
" clf = clf_list[i]\n",
- " \n",
+ "\n",
" prec_val_list = []\n",
" col_name_list = []\n",
- " \n",
- " for dataset_name in dataset_name_list:\n",
"\n",
- " cmd = 'y_pred_' + dataset_name + ' = clf.predict(X_' + dataset_name + ')'\n",
- " print(cmd)\n",
- " exec(cmd)\n",
- " \n",
- " for metric_name in metric_name_list:\n",
- " \n",
- " cmd = 'prec_val_list.append(' + metric_name + '_score(y_' + dataset_name + ', y_pred_' + dataset_name + '))'\n",
- " print(cmd)\n",
- " exec(cmd)\n",
+ " for x_dataset, y_dataset, col_name in dataset_list:\n",
+ " y_pred = clf.predict(x_dataset)\n",
"\n",
+ " for metric_func, pretty_name in metric_name_list.items():\n",
+ " prec_val_list.append(metric_func(y_dataset, y_pred))\n",
+ " col_name_list.append(pretty_name + '_' + col_name)\n",
"\n",
- " col_name_list.append(metric_name + '_' + dataset_name)\n",
- " print('')\n",
+ " performance_dict[model_name_list[i].__name__] = prec_val_list\n",
"\n",
- " performance_dict[model_name_list[i]] = prec_val_list\n",
- " \n",
- "df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)"
+ "df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)\n"
]
},
{
@@ -357,87 +286,8 @@
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " precision_train | \n",
- " recall_train | \n",
- " f1_train | \n",
- " precision_test | \n",
- " recall_test | \n",
- " f1_test | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | DecisionTreeClassifier | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.952 | \n",
- " 0.896 | \n",
- " 0.923 | \n",
- "
\n",
- " \n",
- " | RandomForestClassifier | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.985 | \n",
- " 0.955 | \n",
- " 0.970 | \n",
- "
\n",
- " \n",
- " | AdaBoostClassifier | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.956 | \n",
- " 0.970 | \n",
- " 0.963 | \n",
- "
\n",
- " \n",
- " | GradientBoostingClassifier | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.970 | \n",
- " 0.970 | \n",
- " 0.970 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " precision_train recall_train f1_train \\\n",
- "DecisionTreeClassifier 1.0 1.0 1.0 \n",
- "RandomForestClassifier 1.0 1.0 1.0 \n",
- "AdaBoostClassifier 1.0 1.0 1.0 \n",
- "GradientBoostingClassifier 1.0 1.0 1.0 \n",
- "\n",
- " precision_test recall_test f1_test \n",
- "DecisionTreeClassifier 0.952 0.896 0.923 \n",
- "RandomForestClassifier 0.985 0.955 0.970 \n",
- "AdaBoostClassifier 0.956 0.970 0.963 \n",
- "GradientBoostingClassifier 0.970 0.970 0.970 "
- ]
+ "text/plain": " precision_train recall_train f1_train \\\nDecisionTreeClassifier 1.0 1.0 1.0 \nRandomForestClassifier 1.0 1.0 1.0 \nAdaBoostClassifier 1.0 1.0 1.0 \nGradientBoostingClassifier 1.0 1.0 1.0 \n\n precision_test recall_test f1_test \nDecisionTreeClassifier 0.952 0.896 0.923 \nRandomForestClassifier 0.985 0.955 0.970 \nAdaBoostClassifier 0.956 0.970 0.963 \nGradientBoostingClassifier 0.970 0.970 0.970 ",
+ "text/html": "\n\n
\n \n \n | \n precision_train | \n recall_train | \n f1_train | \n precision_test | \n recall_test | \n f1_test | \n
\n \n \n \n | DecisionTreeClassifier | \n 1.0 | \n 1.0 | \n 1.0 | \n 0.952 | \n 0.896 | \n 0.923 | \n
\n \n | RandomForestClassifier | \n 1.0 | \n 1.0 | \n 1.0 | \n 0.985 | \n 0.955 | \n 0.970 | \n
\n \n | AdaBoostClassifier | \n 1.0 | \n 1.0 | \n 1.0 | \n 0.956 | \n 0.970 | \n 0.963 | \n
\n \n | GradientBoostingClassifier | \n 1.0 | \n 1.0 | \n 1.0 | \n 0.970 | \n 0.970 | \n 0.970 | \n
\n \n
\n
"
},
"execution_count": 13,
"metadata": {},