From 5557a0f5bf7bb78184ce96d4b2f5f34eca7f6a99 Mon Sep 17 00:00:00 2001 From: Tristan Eastburn Date: Fri, 22 Dec 2023 16:09:10 -0800 Subject: [PATCH 1/2] Update decision tree ensemble notebook to not use exec I appreciated the youtube series and wanted to give back with my expertise! This code doesn't need exec, and exec and eval are considered anti-patterns, with some security implications. https://stackoverflow.com/questions/1933451/why-should-exec-and-eval-be-avoided --- .../tree_ensemble_example.ipynb | 196 ++---------------- 1 file changed, 23 insertions(+), 173 deletions(-) diff --git a/decision_tree_ensembles/tree_ensemble_example.ipynb b/decision_tree_ensembles/tree_ensemble_example.ipynb index 50d5ede..be63b1d 100644 --- a/decision_tree_ensembles/tree_ensemble_example.ipynb +++ b/decision_tree_ensembles/tree_ensemble_example.ipynb @@ -203,29 +203,15 @@ "cell_type": "code", "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "clf = DecisionTreeClassifier(random_state=0)\n", - "clf = RandomForestClassifier(random_state=0)\n", - "clf = AdaBoostClassifier(random_state=0)\n", - "clf = GradientBoostingClassifier(random_state=0)\n" - ] - } - ], + "outputs": [], "source": [ "clf_list = []\n", - "model_name_list = ['DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier']\n", + "model_name_list = [DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier]\n", "\n", "# train model\n", "for model_name in model_name_list:\n", "\n", - " # dynamically write command to train each classifier in model_name_list\n", - " cmd = 'clf = ' + model_name + '(random_state=0)'\n", - " print(cmd) # print command\n", - " exec(cmd) # execute command\n", + " clf = model_name(random_state=0)\n", " \n", " # fit data to model\n", " clf = clf.fit(X_train, y_train)\n", @@ -241,12 +227,7 @@ "outputs": [ { "data": { - "text/plain": [ - "[DecisionTreeClassifier(random_state=0),\n", - " RandomForestClassifier(random_state=0),\n", - " AdaBoostClassifier(random_state=0),\n", - " GradientBoostingClassifier(random_state=0)]" - ] + "text/plain": "[DecisionTreeClassifier(random_state=0),\n RandomForestClassifier(random_state=0),\n AdaBoostClassifier(random_state=0),\n GradientBoostingClassifier(random_state=0)]" }, "execution_count": 11, "metadata": {}, @@ -268,176 +249,45 @@ "cell_type": "code", "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "y_pred_train = clf.predict(X_train)\n", - "prec_val_list.append(precision_score(y_train, y_pred_train))\n", - "prec_val_list.append(recall_score(y_train, y_pred_train))\n", - "prec_val_list.append(f1_score(y_train, y_pred_train))\n", - "\n", - "y_pred_test = clf.predict(X_test)\n", - "prec_val_list.append(precision_score(y_test, y_pred_test))\n", - "prec_val_list.append(recall_score(y_test, y_pred_test))\n", - "prec_val_list.append(f1_score(y_test, y_pred_test))\n", - "\n", - "y_pred_train = clf.predict(X_train)\n", - "prec_val_list.append(precision_score(y_train, y_pred_train))\n", - "prec_val_list.append(recall_score(y_train, y_pred_train))\n", - "prec_val_list.append(f1_score(y_train, y_pred_train))\n", - "\n", - "y_pred_test = clf.predict(X_test)\n", - "prec_val_list.append(precision_score(y_test, y_pred_test))\n", - "prec_val_list.append(recall_score(y_test, y_pred_test))\n", - "prec_val_list.append(f1_score(y_test, y_pred_test))\n", - "\n", - "y_pred_train = clf.predict(X_train)\n", - "prec_val_list.append(precision_score(y_train, y_pred_train))\n", - "prec_val_list.append(recall_score(y_train, y_pred_train))\n", - "prec_val_list.append(f1_score(y_train, y_pred_train))\n", - "\n", - "y_pred_test = clf.predict(X_test)\n", - "prec_val_list.append(precision_score(y_test, y_pred_test))\n", - "prec_val_list.append(recall_score(y_test, y_pred_test))\n", - "prec_val_list.append(f1_score(y_test, y_pred_test))\n", - "\n", - "y_pred_train = clf.predict(X_train)\n", - "prec_val_list.append(precision_score(y_train, y_pred_train))\n", - "prec_val_list.append(recall_score(y_train, y_pred_train))\n", - "prec_val_list.append(f1_score(y_train, y_pred_train))\n", - "\n", - "y_pred_test = clf.predict(X_test)\n", - "prec_val_list.append(precision_score(y_test, y_pred_test))\n", - "prec_val_list.append(recall_score(y_test, y_pred_test))\n", - "prec_val_list.append(f1_score(y_test, y_pred_test))\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "performance_dict = {}\n", "\n", - "dataset_name_list = ['train', 'test']\n", - "metric_name_list = ['precision', 'recall', 'f1']\n", + "dataset_list = [\n", + " (X_train, y_train, \"train\"),\n", + " (X_test, y_test, \"test\"),\n", + "]\n", + "metric_name_list = {precision_score: \"precision\", recall_score: \"recall\", f1_score: \"f1\"}\n", + "col_name_list = []\n", "\n", "for i in range(len(model_name_list)):\n", - " \n", + "\n", " clf = clf_list[i]\n", - " \n", + "\n", " prec_val_list = []\n", " col_name_list = []\n", - " \n", - " for dataset_name in dataset_name_list:\n", "\n", - " cmd = 'y_pred_' + dataset_name + ' = clf.predict(X_' + dataset_name + ')'\n", - " print(cmd)\n", - " exec(cmd)\n", - " \n", - " for metric_name in metric_name_list:\n", - " \n", - " cmd = 'prec_val_list.append(' + metric_name + '_score(y_' + dataset_name + ', y_pred_' + dataset_name + '))'\n", - " print(cmd)\n", - " exec(cmd)\n", + " for x_dataset, y_dataset, col_name in dataset_list:\n", + " y_pred = clf.predict(x_dataset)\n", "\n", + " for metric_func, pretty_name in metric_name_list.items():\n", + " prec_val_list.append(metric_func(y_dataset, y_pred))\n", + " col_name_list.append(pretty_name + '_' + col_name)\n", "\n", - " col_name_list.append(metric_name + '_' + dataset_name)\n", - " print('')\n", + " performance_dict[model_name_list[i].__name__] = prec_val_list\n", "\n", - " performance_dict[model_name_list[i]] = prec_val_list\n", - " \n", - "df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)" + "df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)\n" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
precision_trainrecall_trainf1_trainprecision_testrecall_testf1_test
DecisionTreeClassifier1.01.01.00.9520.8960.923
RandomForestClassifier1.01.01.00.9850.9550.970
AdaBoostClassifier1.01.01.00.9560.9700.963
GradientBoostingClassifier1.01.01.00.9700.9700.970
\n", - "
" - ], - "text/plain": [ - " precision_train recall_train f1_train \\\n", - "DecisionTreeClassifier 1.0 1.0 1.0 \n", - "RandomForestClassifier 1.0 1.0 1.0 \n", - "AdaBoostClassifier 1.0 1.0 1.0 \n", - "GradientBoostingClassifier 1.0 1.0 1.0 \n", - "\n", - " precision_test recall_test f1_test \n", - "DecisionTreeClassifier 0.952 0.896 0.923 \n", - "RandomForestClassifier 0.985 0.955 0.970 \n", - "AdaBoostClassifier 0.956 0.970 0.963 \n", - "GradientBoostingClassifier 0.970 0.970 0.970 " - ] + "text/plain": " precision_train recall_train f1_train \\\nDecisionTreeClassifier 1.0 1.0 1.0 \nRandomForestClassifier 1.0 1.0 1.0 \nAdaBoostClassifier 1.0 1.0 1.0 \nGradientBoostingClassifier 1.0 1.0 1.0 \n\n precision_test recall_test f1_test \nDecisionTreeClassifier 0.952 0.896 0.923 \nRandomForestClassifier 0.985 0.955 0.970 \nAdaBoostClassifier 0.956 0.970 0.963 \nGradientBoostingClassifier 0.970 0.970 0.970 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
precision_trainrecall_trainf1_trainprecision_testrecall_testf1_test
DecisionTreeClassifier1.01.01.00.9520.8960.923
RandomForestClassifier1.01.01.00.9850.9550.970
AdaBoostClassifier1.01.01.00.9560.9700.963
GradientBoostingClassifier1.01.01.00.9700.9700.970
\n
" }, "execution_count": 13, "metadata": {}, From bad8be6707e2455bf7a2efaef40605d6d55a0080 Mon Sep 17 00:00:00 2001 From: Tristan Eastburn Date: Fri, 22 Dec 2023 16:12:15 -0800 Subject: [PATCH 2/2] Update decision_tree_ensembles/tree_ensemble_example.ipynb --- decision_tree_ensembles/tree_ensemble_example.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decision_tree_ensembles/tree_ensemble_example.ipynb b/decision_tree_ensembles/tree_ensemble_example.ipynb index be63b1d..e402448 100644 --- a/decision_tree_ensembles/tree_ensemble_example.ipynb +++ b/decision_tree_ensembles/tree_ensemble_example.ipynb @@ -281,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "metadata": {}, "outputs": [ {