{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PS-Ensemble-methods.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyMgLBThUr14Vem4/BKwpv5s"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Batch Ensemble Methods"],"metadata":{"id":"KWiFYgxrnwLn"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"YDcCrLh8Hqwd"},"outputs":[],"source":["import numpy as np\n","import sklearn\n","from sklearn.datasets import load_breast_cancer\n","from sklearn.model_selection import cross_val_score\n","from sklearn.model_selection import RepeatedStratifiedKFold\n","from sklearn.model_selection import train_test_split\n","from sklearn.ensemble import BaggingClassifier\n","import matplotlib.pyplot as plt\n"]},{"cell_type":"code","source":["X,y = load_breast_cancer(return_X_y=True)\n","X.shape, y.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NnVvo5_kJKcX","executionInfo":{"status":"ok","timestamp":1653217001215,"user_tz":-60,"elapsed":16,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"8906b25d-117a-4b28-8e27-09f44f1304cb"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((569, 30), (569,))"]},"metadata":{},"execution_count":2}]},{"cell_type":"code","source":["X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, stratify=y)\n","X_train.shape, X_test.shape"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JmsUEN-fLbe9","executionInfo":{"status":"ok","timestamp":1653217001216,"user_tz":-60,"elapsed":12,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"3f32ea24-efcf-43f1-e58d-ac89248b4554"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((381, 30), (188, 30))"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["model = BaggingClassifier()"],"metadata":{"id":"a85fmd4fJSP4"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)\n","n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')\n","f\"Accuracy: {np.mean(n_scores)}, ({np.std(n_scores)})\"\n","## Bagging ensemble with default hyperparameters achieves a classification accuracy of about 94%"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":58},"id":"DMSHRl-CKUPB","executionInfo":{"status":"ok","timestamp":1653217003124,"user_tz":-60,"elapsed":1913,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"5e13346b-6b7f-4721-a6a5-38a451438814"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'Accuracy: 0.9537593984962405, (0.026208482535985338)'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":5}]},{"cell_type":"code","source":["sorted(sklearn.metrics.SCORERS.keys()) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EZDcIr1xKZj6","executionInfo":{"status":"ok","timestamp":1653217003125,"user_tz":-60,"elapsed":12,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"62d0f173-308b-4fa1-835b-57610846faf6"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['accuracy',\n"," 'adjusted_mutual_info_score',\n"," 'adjusted_rand_score',\n"," 'average_precision',\n"," 'balanced_accuracy',\n"," 'completeness_score',\n"," 'explained_variance',\n"," 'f1',\n"," 'f1_macro',\n"," 'f1_micro',\n"," 'f1_samples',\n"," 'f1_weighted',\n"," 'fowlkes_mallows_score',\n"," 'homogeneity_score',\n"," 'jaccard',\n"," 'jaccard_macro',\n"," 'jaccard_micro',\n"," 'jaccard_samples',\n"," 'jaccard_weighted',\n"," 'max_error',\n"," 'mutual_info_score',\n"," 'neg_brier_score',\n"," 'neg_log_loss',\n"," 'neg_mean_absolute_error',\n"," 'neg_mean_absolute_percentage_error',\n"," 'neg_mean_gamma_deviance',\n"," 'neg_mean_poisson_deviance',\n"," 'neg_mean_squared_error',\n"," 'neg_mean_squared_log_error',\n"," 'neg_median_absolute_error',\n"," 'neg_root_mean_squared_error',\n"," 'normalized_mutual_info_score',\n"," 'precision',\n"," 'precision_macro',\n"," 'precision_micro',\n"," 'precision_samples',\n"," 'precision_weighted',\n"," 'r2',\n"," 'rand_score',\n"," 'recall',\n"," 'recall_macro',\n"," 'recall_micro',\n"," 'recall_samples',\n"," 'recall_weighted',\n"," 'roc_auc',\n"," 'roc_auc_ovo',\n"," 'roc_auc_ovo_weighted',\n"," 'roc_auc_ovr',\n"," 'roc_auc_ovr_weighted',\n"," 'top_k_accuracy',\n"," 'v_measure_score']"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3)\n","n_scores = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')\n","f\"F1 score: {np.mean(n_scores)}, ({np.std(n_scores)})\"\n","## Bagging ensemble with default hyperparameters achieves a classification F1 score of about 0.96. Almost perfect score."],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":58},"id":"p1RMiei_mhoJ","executionInfo":{"status":"ok","timestamp":1653217004258,"user_tz":-60,"elapsed":1139,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"4f197956-9680-4455-bc64-977f1188d116"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'F1 score: 0.9615822205587333, (0.018296811969348698)'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["## Let's train the model to use it to predict the future.\n","model = BaggingClassifier()\n","model.fit(X_train, y_train)\n","err = []\n","for xx,yy in zip(X_test,y_test):\n"," y_hat = model.predict(xx.reshape(1, -1))\n"," err.append(abs(y_hat-yy))\n","\n","print(f\"Accuracy: {1-np.sum(np.array(err))/len(err)}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GgIIgIzhmpm5","executionInfo":{"status":"ok","timestamp":1653217004709,"user_tz":-60,"elapsed":456,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"a8b0e331-9691-4b2e-ae2a-74f1ef526aec"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Accuracy: 0.9680851063829787\n"]}]},{"cell_type":"code","source":["## Instead of a loop on the data, we can do batch prediction\n","y_hat = model.predict(X_test)\n","print(y_hat.shape)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_nR0kon9pXjw","executionInfo":{"status":"ok","timestamp":1653217004710,"user_tz":-60,"elapsed":11,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"fed795b4-b526-406c-cd5e-6c57d11e9fb4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(188,)\n"]}]},{"cell_type":"code","source":["## What if we are not happy with default hyperparameters?\n","\n","# get a list of models to evaluate. Each model will be defined by different hyperparameters\n","def create_models():\n","\tmodels = dict()\n","\t# define number of trees\n","\tn_trees = [10, 50, 100, 500, 500, 1000, 5000]\n","\tfor n in n_trees:\n","\t\tmodels[str(n)] = BaggingClassifier(n_estimators=n)\n","\treturn models\n"," \n","# evaluate a given model using cross-validation\n","def evaluate_model(model, X, y):\n","\t# define the evaluation procedure\n","\tcv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3)\n","\t# evaluate the model and collect the results\n","\tscores = cross_val_score(model, X, y, scoring='f1', cv=cv, n_jobs=-1)\n","\treturn scores\n"," "],"metadata":{"id":"grZoqEfbqBvD"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Running this cell will take a lot of time!\n","models = create_models()\n","# evaluate the models and store results\n","results = []\n","names = []\n","for name, model in models.items():\n","\t# evaluate the model\n","\tscores = evaluate_model(model, X, y)\n","\t# store the results\n","\tresults.append(scores)\n","\tnames.append(name)\n","\t# summarize the performance along the way\n","\tprint('>%s %.3f (%.3f)' % (name, np.mean(scores), np.std(scores)))\n","# plot model performance for comparison\n","plt.boxplot(results, labels=names, showmeans=True)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"suCDzITFq6X1","executionInfo":{"status":"ok","timestamp":1653217733884,"user_tz":-60,"elapsed":729178,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"b0bd888b-0331-4001-c990-7b9ae276d753"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[">10 0.956 (0.028)\n",">50 0.970 (0.022)\n",">100 0.969 (0.022)\n",">500 0.969 (0.015)\n",">1000 0.971 (0.017)\n",">5000 0.972 (0.023)\n"]},{"output_type":"execute_result","data":{"text/plain":["{'boxes': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ],\n"," 'caps': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ],\n"," 'fliers': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ],\n"," 'means': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ],\n"," 'medians': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ],\n"," 'whiskers': [,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ,\n"," ]}"]},"metadata":{},"execution_count":11},{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAURklEQVR4nO3db4xd9Z3f8ffHxiRVIYuNXUQxBVZlJRtEV8msl22TNUTChn0AAqRdvFUDqVVEKHlGJCxHJWtkWd2lUhWVMEtl1NIHRizaAFWzsV1slCJtKoaAnbCuwUG7xSaNh/JnG7EJg+fXB3PGugzz587MnfvnN++XdORzz597fl+fuZ977u+ce25KKUiS6rWi1w2QJC0tg16SKmfQS1LlDHpJqpxBL0mVO6fXDZhq7dq15fLLL+91MyRpoLz88svvlFLWTTev74L+8ssvZ2RkpNfNkKSBkuRvZppn140kVc6gl6TKGfSSVDmDXpIqZ9BLUuXmDPokjyc5neQnM8xPkm8nOZHkaJLPt8y7M8kbzXBnJxsu1Wzr1q2sWLGCJKxYsYKtW7f2ukkaYO0c0f8n4MZZ5t8EXNkMdwOPAiRZAzwI/DawCXgwyerFNFZaDrZu3cqBAwe45557eP/997nnnns4cOCAYa8Fm/M6+lLKD5JcPssitwBPlIn7Hf8wyQVJLgauAw6WUt4FSHKQiTeMfYtttFSzgwcP8rWvfY3vfOc7AGf/HR4e7mWzNMA60Ud/CfBWy+OTzbSZpn9KkruTjCQZGR0d7UCTPvX8Cx4GgfUNdn1TlVLYs2fPJ6bt2bOHQf3tiJr336DU1hcnY0spj5VShkopQ+vWTfsN3sU+/4xDO/P7nfUNdn1TJWHHjh2fmLZjx46BCL7p1Lz/BqW2TgT9KeDSlsfrm2kzTZc0ixtuuIFHH32Ue++9lw8++IB7772XRx99lBtuuKHXTdOA6kTQPwd8pbn65lrgg1LKz4D9wJYkq5uTsFuaaZJmsX//frZs2cLw8DAXXHABw8PDbNmyhf37ffloYeY8GZtkHxMnVtcmOcnElTSrAEopw8D3gN8DTgAfAl9t5r2b5CHgpeapdk2emJU0O0NdndTOVTfb5phfgH89w7zHgccX1jRJUif0xclYSdLSMeglqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5c3rdAGm5S7LgdUspHWyJprNmzRree++9Ba27kH27evVq3n333QVtbyYG/YCo4Y9tNrXXN5vZwjrJQIR5zfvvvffe6+o+WMwb/0wM+gFRwx/bbGqvr3buv/5mH70kVc6gl6TKGfSSVDmDXpIqZ9BLUuWqCfo1a9aQZN4DsKD11qxZ0+OKJak9bQV9khuTHE9yIskD08y/LMnzSY4meSHJ+pZ5f5zktSTHknw7S3Rd1OTlXd0aFnrNsCR125xBn2Ql8AhwE7AR2JZk45TFHgaeKKVcA+wC9jTr/lPgnwHXAFcDvwVs7ljrJUlzaueIfhNwopTyZinlI+BJ4JYpy2wEDjXjh1vmF+CzwLnAZ4BVwM8X22hJUvvaCfpLgLdaHp9sprU6AtzWjN8KnJ/kwlLKXzIR/D9rhv2llGNTN5Dk7iQjSUZGR0fnW4MkaRadOhl7P7A5yStMdM2cAs4k+cfABmA9E28OX07ypakrl1IeK6UMlVKG1q1b16EmSZKgvXvdnAIubXm8vpl2VinlbZoj+iTnAbeXUt5P8q+AH5ZSftHM+wvgd4D/0YG2S5La0M4R/UvAlUmuSHIucAfwXOsCSdYmmXyuHcDjzfj/ZuJI/5wkq5g42v9U140kaenMGfSllI+B+4D9TIT0U6WU15LsSnJzs9h1wPEkrwMXAbub6U8DPwV+zEQ//pFSyn/tbAmSpNmk3+51PTQ0VEZGRua9Xrfv2+323F432M7eb29QakvycillaLp51XwzVpI0PYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6QOG/1wlLu+fxfv/N07vW4KYNBLXeEvoH1av4VhJw0fHeZHP/8Rw0eGe90UwG/GLljXv7H4rV/r3rbObvODLm6r7vr8+/y0hy5czZ+dfx6///9+wTf/7wJ+sa1b+2+etY2uXMFN6/8hv1qxgs+Mj/P9k2+z9sz4PLc5/9pm+2asQb9Abs/tzYtvZJ8w+uEoN/35TfzqzK/4zMrP8P3bv8/av7d2yba3GPPd1kM/fIjvvvFdxsbHWLViFbddeRvfvPabS7a9lvVmDPp2blOsATX64Sjf+ME3eHjzw/N6Eanz8kd/2/03sm91bXPzNnx0mPEycZQ7XsYZPjI8rzDsV6MfjvLsiWcZGx8DYGx8jGdOPMM9/+Senr4G7aOvWL/1E2p+au3DnikMa6iz9Q1s0uQbWS8Z9JWafDEVSjUvouWm1jfqfg3DTjhy+sjZN7BJY+NjvHr61R61aIJdN5Wq9aNxq5q7pqa+Uff6o38n9WsYdsLTNz/d6yZMy6CvUL/2E3Za6xFvbW9iNb9R92sY1syumwrV/NF4Us1dUzX3Yas3DPoK1fzReNJ0R7y1WA5v1Oouu24qVPtH49q7ppbDG7W6y6DXwJntiLeGfuza36jVfXbdaOB4xCvNj0f0Gjge8Urz4xG9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXJtBX2SG5McT3IiyQPTzL8syfNJjiZ5Icn6lnn/KMmBJMeS/FWSyzvXfEnSXOYM+iQrgUeAm4CNwLYkG6cs9jDwRCnlGmAXsKdl3hPAn5RSNgCbgNOdaLgkqT3tHNFvAk6UUt4spXwEPAncMmWZjcChZvzw5PzmDeGcUspBgFLKL0opH3ak5ZKktrQT9JcAb7U8PtlMa3UEuK0ZvxU4P8mFwG8A7yf58ySvJPmT5hPCJyS5O8lIkpHR0dH5VyFJmlGnTsbeD2xO8gqwGTgFnGHipmlfaub/FvDrwF1TVy6lPFZKGSqlDK1bt65DTZIkQXt3rzwFXNryeH0z7axSyts0R/RJzgNuL6W8n+Qk8Gop5c1m3jPAtcDeDrT9E8qDn4Nv/Vqnn3b27UnSAGgn6F8CrkxyBRMBfwfwh60LJFkLvFtKGQd2AI+3rHtBknWllFHgy8BIpxr/iTb80d9SSlmKp55+ewnlW13bnCQt2JxdN6WUj4H7gP3AMeCpUsprSXYlublZ7DrgeJLXgYuA3c26Z5jotnk+yY+BAP+x41VI6rkkXRtWr17d63IHSls/PFJK+R7wvSnT/k3L+NPAtL8G0Vxxc80i2rhkRj8c5Rs/+AYPb354IH5rNEnXttWLF5L1dU6361vop+kkXf0kvlCDvu+W9S9MDR8d5kc//9FA/NZo7S8k65veoNRXsxr23bK9BcLoh6M8e+JZCoVnTjzDO3/3Tq+bJElLYtkG/fDRYcbLOADjZZzhI8M9bpEkLY1lGfSTR/Nj42MAjI2PeVQvqVrLMuhbj+YneVQvqVbLMuiPnD5y9mh+0tj4GK+efrVHLZKkpbMsr7p5+uZprwSVpCotyyN6SVpODHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5toI+yY1Jjic5keSBaeZfluT5JEeTvJBk/ZT5n0tyMsl/6FTDZ2hn14bVq1cvZSmS1DHnzLVAkpXAI8ANwEngpSTPlVL+qmWxh4EnSin/OcmXgT3Av2iZ/xDwg841+9NKKQtaL8mC15U6IcmC5/u3q3a0c0S/CThRSnmzlPIR8CRwy5RlNgKHmvHDrfOTfAG4CDiw+OZK9SmlLHiQ2tFO0F8CvNXy+GQzrdUR4LZm/Fbg/CQXJlkB/Dvg/tk2kOTuJCNJRkZHR9truSSpLZ06GXs/sDnJK8Bm4BRwBrgX+F4p5eRsK5dSHiulDJVShtatW9ehJkmSoI0+eiZC+9KWx+ubaWeVUt6mOaJPch5weynl/SS/A3wpyb3AecC5SX5RSvnUCV1J0tJoJ+hfAq5McgUTAX8H8IetCyRZC7xbShkHdgCPA5RS/nnLMncBQ4a8JHXXnF03pZSPgfuA/cAx4KlSymtJdiW5uVnsOuB4kteZOPG6e4naK0map/TbmfuhoaEyMjLSte3Vfnml9amf1bz/ul1bkpdLKUPTzfObsZJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuXauXulJGkag/IzkAa9JC3QoNyQza4bSaqcR/Tqe4Py8VjqVwa9+p5hLS2OXTeSVDmDXpIqZ9eNpCXlOZbeM+glLSnDuvfsupGkyhn0klQ5g16SKmcffQU82SVpNgZ9BQxrSbOx60aSKmfQS1LlDHpJqpxBL0mVM+glqXJtBX2SG5McT3IiyQPTzL8syfNJjiZ5Icn6ZvpvJvnLJK818/6g0wVIkmY3Z9AnWQk8AtwEbAS2Jdk4ZbGHgSdKKdcAu4A9zfQPga+UUq4CbgT+fZILOtV4SdLc2jmi3wScKKW8WUr5CHgSuGXKMhuBQ8344cn5pZTXSylvNONvA6eBdZ1ouCSpPe0E/SXAWy2PTzbTWh0BbmvGbwXOT3Jh6wJJNgHnAj+duoEkdycZSTIyOjrabtslSW3o1MnY+4HNSV4BNgOngDOTM5NcDPwX4KullPGpK5dSHiulDJVShtat84BfkjqpnVsgnAIubXm8vpl2VtMtcxtAkvOA20sp7zePPwf8N2BnKeWHnWi0JKl97RzRvwRcmeSKJOcCdwDPtS6QZG2SyefaATzeTD8X+C4TJ2qf7lyzJUntmjPoSykfA/cB+4FjwFOllNeS7Epyc7PYdcDxJK8DFwG7m+m/D/wucFeSV5vhNztdhCRpZum3Ox8ODQ2VkZGRrm0viXd/lDTwkrxcShmabp7fjJWkyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6DaR9+/Zx9dVXs3LlSq6++mr27dvX6yZJfaudHx6R+sq+ffvYuXMne/fu5Ytf/CIvvvgi27dvB2Dbtm09bp3Ufzyi18DZvXs3e/fu5frrr2fVqlVcf/317N27l927d8+9srQMeT9670c/cFauXMkvf/lLVq1adXba2NgYn/3sZzlz5swsa0r18n70qsqGDRt48cUXPzHtxRdfZMOGDT1qkdTfDHoNnJ07d7J9+3YOHz7M2NgYhw8fZvv27ezcubPXTZP6kidjNXAmT7h+/etf59ixY2zYsIHdu3d7IlaagX309tFLqoB99JK0jBn0klQ5g75SfnNU6o1+fO15MrZCfnNU6o2+fe2VUvpq+MIXvlC6aeK/oC5XXXVVOXTo0CemHTp0qFx11VU9apG0PPTytQeMlBly1atuKrzqxm+OSr3Ry9eeV90sM35zVOqNfn3tGfQV8pujUm/062vPk7EV8pujUm/062vPPvoK++glLT+L7qNPcmOS40lOJHlgmvmXJXk+ydEkLyRZ3zLvziRvNMOdCy9DkrQQcwZ9kpXAI8BNwEZgW5KNUxZ7GHiilHINsAvY06y7BngQ+G1gE/BgktWda74kaS7tHNFvAk6UUt4spXwEPAncMmWZjcChZvxwy/ytwMFSyrullPeAg8CNi2+2JKld7QT9JcBbLY9PNtNaHQFua8ZvBc5PcmGb65Lk7iQjSUZGR0fbbbskqQ2durzyfmBzkleAzcApoO1vB5RSHiulDJVShtatW9ehJkmSoL3LK08Bl7Y8Xt9MO6uU8jbNEX2S84DbSynvJzkFXDdl3RcW0V5J0jy1c0T/EnBlkiuSnAvcATzXukCStUkmn2sH8Hgzvh/YkmR1cxJ2SzOtq5LMOLQzX5IG2ZxBX0r5GLiPiYA+BjxVSnktya4kNzeLXQccT/I6cBGwu1n3XeAhJt4sXgJ2NdO6aqYb/bQzSNKgW/ZfmJKkGnhTM0laxgx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLm+u44+ySjwN13c5FrgnS5ur9usb7BZ3+Dqdm2XlVKmvVlY3wV9tyUZmelLBjWwvsFmfYOrn2qz60aSKmfQS1LlDHp4rNcNWGLWN9isb3D1TW3Lvo9ekmrnEb0kVc6gl6TKLaugT/J4ktNJftIybU2Sg0neaP5d3cs2LlaSv07y4ySvJhlppg1sjfPZZ5nw7SQnkhxN8vnetbw989lfg1Bfp/ZXkjub5d9IcmcvaplJp/ZZV2tczK8vDdoA/C7weeAnLdP+GHigGX8A+Le9bucia/xrYO2UaQNb43z2GfB7wF8AAa4F/mev29/J/TUI9XVifwFrgDebf1c346t7XVsn91m3a+z5f1oPdtLlU/4IjwMXN+MXA8d73cZF1jfdH+FA19juPgP+FNg23XL9Osxnfw1KfYvdX8A24E9bpn9iuV4Pndhn3a5xWXXdzOCiUsrPmvH/w8Rv3g6yAhxI8nKSu5tptdU4Uz2XAG+1LHeymdbP5rO/BrE+mH89/V5nJ/ZZV2s8Z6meeBCVUkqSQb/e9IullFNJ/gFwMMn/ap1ZSY1nVVCP+2vwDNw+84gefp7kYoDm39M9bs+ilFJONf+eBr4LbKKyGpm5nlPApS3LrW+m9a157q+Bq68x33r6us4O7bOu1mjQw3PA5BnvO4Fne9iWRUny95OcPzkObAF+QkU1Nmaq5zngK82VDtcCH7R8nO47C9hfA1Vfi/nWsx/YkmR1c/XKlmZaz3Vwn3W3xl6f2OjySZR9wM+AMSb6xLYDFwLPA28A/x1Y0+t2LqK+XweONMNrwM5m+sDWOJ99xsSVDY8APwV+DAz1uv2d3F+DUF+n9hfwL4ETzfDVXte1FPusmzV6CwRJqpxdN5JUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVe7/A9zIBUqmYilpAAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":["## Ensemble methods for streams with river"],"metadata":{"id":"DjU2cj5DoEWH"}},{"cell_type":"code","source":["!pip install river"],"metadata":{"id":"74zhsFMpbHwT"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from river import ensemble\n","from river import evaluate\n","from river import metrics\n","from river import optim\n","from river import preprocessing\n","from river import stream\n","from river import datasets\n","from river import tree\n","from river import linear_model\n","from river import neighbors"],"metadata":{"id":"R9027HL2q8Wi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Classification task\n","dataset = datasets.Phishing()\n"],"metadata":{"id":"Z0F5DStPYeE7"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model = ensemble.BaggingClassifier(model=\n"," (preprocessing.StandardScaler() | \n"," linear_model.LogisticRegression()), n_models=3)\n","metric = metrics.F1()"],"metadata":{"id":"kCO31LQWYjUu"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9BJ-bAl1ae4S","executionInfo":{"status":"ok","timestamp":1653218281335,"user_tz":-60,"elapsed":874,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"0f8e057d-a06c-4f05-a165-c3ca2b4bba83"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 87.96%"]},"metadata":{},"execution_count":41}]},{"cell_type":"code","source":["base_model = tree.HoeffdingTreeClassifier(grace_period=50, split_confidence=0.01)\n","#base_model = tree.SGTClassifier()\n","model = ensemble.BaggingClassifier(model= base_model, n_models=10)\n","metric = metrics.F1()"],"metadata":{"id":"8n1D0xJwahxW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DEdE25FUcx0d","executionInfo":{"status":"ok","timestamp":1653218736250,"user_tz":-60,"elapsed":2396,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"a268d4a6-e951-4e06-b73e-f03455b5230b"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 88.99%"]},"metadata":{},"execution_count":57}]},{"cell_type":"code","source":["## With ADWIN drift detection\n","\n","base_model = tree.HoeffdingTreeClassifier(grace_period=50, split_confidence=0.01)\n","#base_model = tree.SGTClassifier()\n","model = ensemble.ADWINBaggingClassifier(model= base_model, n_models=10)\n","metric = metrics.F1()"],"metadata":{"id":"ay2cl934fn9K"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zgdo_3rcfnrV","executionInfo":{"status":"ok","timestamp":1653218466328,"user_tz":-60,"elapsed":2967,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"d008bf95-ac6f-4eb6-8a3f-3f76940f9c3d"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 88.50%"]},"metadata":{},"execution_count":47}]},{"cell_type":"code","source":["## Adaptive Random Forests\n","model = ensemble.AdaptiveRandomForestClassifier(n_models=3)\n","metric = metrics.F1()"],"metadata":{"id":"-XnRCvc-gYBb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E3FldehpggPd","executionInfo":{"status":"ok","timestamp":1653218641371,"user_tz":-60,"elapsed":818,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"bd5aaf95-c503-4530-ab56-77137528aba0"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["F1: 89.37%"]},"metadata":{},"execution_count":53}]},{"cell_type":"code","source":["## Regression task\n","\n","dataset = dataset = datasets.TrumpApproval()"],"metadata":{"id":"N5fZl3dFdWd0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model = preprocessing.StandardScaler()\n","model |= ensemble.BaggingRegressor(model=linear_model.LinearRegression(intercept_lr=0.1), n_models=3)\n","\n","metric = metrics.MAE()"],"metadata":{"id":"W0PFUo_og1i8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3p5yaCZsg1be","executionInfo":{"status":"ok","timestamp":1653218853717,"user_tz":-60,"elapsed":4,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"481c5e75-0ca9-4183-f747-f3f352e43459"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["MAE: 0.726529"]},"metadata":{},"execution_count":64}]},{"cell_type":"code","source":["base_model = tree.HoeffdingTreeRegressor(grace_period=5)\n","base_model = tree.SGTRegressor(grace_period=1)\n","\n","model = ensemble.BaggingRegressor(model=base_model, n_models=10)\n","\n","metric = metrics.MAE()"],"metadata":{"id":"esuqZtUxg1Oa"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JVhzxEl2iGOQ","executionInfo":{"status":"ok","timestamp":1653219752866,"user_tz":-60,"elapsed":100250,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"d660649b-b3e1-43e4-89f8-41d8d27186d0"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["MAE: 1.228878"]},"metadata":{},"execution_count":92}]},{"cell_type":"code","source":["#base_model = (preprocessing.StandardScaler() | neighbors.KNNRegressor(window_size=10))\n","base_model = neighbors.KNNRegressor(window_size=10)\n","\n","model = ensemble.BaggingRegressor(model=base_model, n_models=3)\n","\n","metric = metrics.MAE()"],"metadata":{"id":"fdPqSIHziMZE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JOahm0FljQov","executionInfo":{"status":"ok","timestamp":1653220102682,"user_tz":-60,"elapsed":405,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"19372c2b-7102-4992-a103-15ff24294c19"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["MAE: 0.352555"]},"metadata":{},"execution_count":115}]},{"cell_type":"code","source":["## Adaptive Random Forest Regression\n","\n","model = (preprocessing.StandardScaler() | ensemble.AdaptiveRandomForestRegressor(n_models=100))\n","metric = metrics.MAE()"],"metadata":{"id":"rlaiGTibllsw"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["evaluate.progressive_val_score(dataset, model, metric)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"FTupo-lTmmsx","executionInfo":{"status":"ok","timestamp":1653220388328,"user_tz":-60,"elapsed":52021,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"8f13e34c-ba23-4feb-a43a-ce569ff56f9a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["MAE: 0.808674"]},"metadata":{},"execution_count":127}]},{"cell_type":"code","source":[""],"metadata":{"id":"XNLyCLimmnqy"},"execution_count":null,"outputs":[]}]}