{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PS-Regression.ipynb","provenance":[],"authorship_tag":"ABX9TyMUxMMGOEfggrVYjL6jkgP2"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"z-0ODmpU5kdH"},"outputs":[],"source":["### The bike sharing forecasting problem (river tutorial)"]},{"cell_type":"code","source":["!pip install river"],"metadata":{"id":"6zYUgVlK5usm"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from river import datasets\n","from pprint import pprint\n","from river import compose\n","from river import linear_model\n","from river import metrics\n","from river import evaluate\n","from river import preprocessing\n","from river import optim\n","from river import feature_extraction\n","from river import stats\n","from river import ensemble\n","from river import neighbors\n","import datetime as dt"],"metadata":{"id":"t2JoFJ0N53jt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Look at the first entry of the dataset\n","\n","X_y = datasets.Bikes() # Connect to the stream\n","\n","for x, y in X_y:\n"," pprint(x)\n"," print(f'Number of available bikes: {y}') # <- We want to predict the number of available bikes\n"," print(f'Number of features: {len(x)}')\n"," break # exit after the first call "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PqiDft-N57bi","executionInfo":{"status":"ok","timestamp":1653242232262,"user_tz":-60,"elapsed":406,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"2cd1ab8c-6f17-44eb-e2ec-30c56c59af84"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["{'clouds': 75,\n"," 'description': 'light rain',\n"," 'humidity': 81,\n"," 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n"," 'pressure': 1017.0,\n"," 'station': 'metro-canal-du-midi',\n"," 'temperature': 6.54,\n"," 'wind': 9.3}\n","Number of available bikes: 1\n","Number of features: 8\n"]}]},{"cell_type":"code","source":["model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind') # Select only numeric features.\n","model |= preprocessing.StandardScaler()\n","model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001)) # <-- Optimizer is Stochastic Gradient Descent. LR=1e-3\n","\n","metric = metrics.MAE()"],"metadata":{"id":"qlsJ3q5w6LZY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":260},"id":"G2a7Gzpt7ye9","executionInfo":{"status":"ok","timestamp":1653242542484,"user_tz":-60,"elapsed":372,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"eb99e80a-d10e-475c-f47d-a68a092db979"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.001\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," )\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
StandardScaler
\n","{'counts': Counter({'pressure': 182470,\n"," 'clouds': 182470,\n"," 'temperature': 182470,\n"," 'wind': 182470,\n"," 'humidity': 182470}),\n"," 'means': defaultdict(<class 'float'>,\n"," {'clouds': 30.315131254453505,\n"," 'humidity': 62.24244533347998,\n"," 'pressure': 1017.0563060996391,\n"," 'temperature': 20.50980692716619,\n"," 'wind': 3.4184331122924543}),\n"," 'vars': defaultdict(<class 'float'>,\n"," {'clouds': 1389.0025610928221,\n"," 'humidity': 349.59967918503554,\n"," 'pressure': 33.298307526514115,\n"," 'temperature': 34.70701720774977,\n"," 'wind': 4.473627075744674}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {'pressure': 7.775241118066252, 'clouds': -0.9309940243402086, 'temperature': 1.3677842059050738, 'wind': -0.1680275080383196, 'humidity': 7.363588933751584},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 5.101412976082054,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 182470})}\n","\n","
"]},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric, print_every=20_000) # We want to report every 20k data points"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XMglZiul7Ja6","executionInfo":{"status":"ok","timestamp":1653242474375,"user_tz":-60,"elapsed":31386,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"1fd07580-af51-4fce-8398-005235c1a73e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[20,000] MAE: 4.912727\n","[40,000] MAE: 5.333554\n","[60,000] MAE: 5.330948\n","[80,000] MAE: 5.392313\n","[100,000] MAE: 5.423059\n","[120,000] MAE: 5.541223\n","[140,000] MAE: 5.613023\n","[160,000] MAE: 5.622428\n","[180,000] MAE: 5.567824\n"]},{"output_type":"execute_result","data":{"text/plain":["MAE: 5.563893"]},"metadata":{},"execution_count":12}]},{"cell_type":"code","source":["# This was a large mean absolute error...\n","# Let's add more information. What about the hour of the day?\n","def get_hour(x):\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n","model += (\n"," get_hour |\n"," feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean()) # We also aggregate the number of bikes in hours\n",")\n","model |= preprocessing.StandardScaler()\n","model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n","\n","metric = metrics.MAE()"],"metadata":{"id":"syCa0WHb7a7-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":378},"id":"b3Bvcy9g8aC_","executionInfo":{"status":"ok","timestamp":1653242738083,"user_tz":-60,"elapsed":383,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"a222486d-0e34-48c5-f31f-588c1733c64c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," TransformerUnion (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," Pipeline (\n"," FuncTransformer (\n"," func=\"get_hour\"\n"," ),\n"," TargetAgg (\n"," by=['station', 'hour']\n"," how=Mean ()\n"," target_name=\"y\"\n"," )\n"," )\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.001\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," )\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour
\n","def get_hour(x):\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour
\n","{'_feature_name': 'y_mean_by_station_and_hour',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 0})}\n","\n","
"]},"metadata":{},"execution_count":17}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric, print_every=20_000)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZTHAJbm38jDz","executionInfo":{"status":"ok","timestamp":1653242921904,"user_tz":-60,"elapsed":45871,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"d1b91144-b64a-44df-ec0a-1502bb5b2329"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[20,000] MAE: 3.721246\n","[40,000] MAE: 3.829972\n","[60,000] MAE: 3.845068\n","[80,000] MAE: 3.910259\n","[100,000] MAE: 3.888652\n","[120,000] MAE: 3.923727\n","[140,000] MAE: 3.980953\n","[160,000] MAE: 3.950034\n","[180,000] MAE: 3.934545\n"]},{"output_type":"execute_result","data":{"text/plain":["MAE: 3.933498"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["# Less error. As expected, the number of available bikes depends on the hour of the day. \n","# What about day of the week? \n","\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n","model += (\n"," get_hour_and_weekday |\n"," feature_extraction.TargetAgg(by=['station', 'hour', 'weekday'], how=stats.Mean()) # We also aggregate the number of bikes in hours\n",")\n","model |= preprocessing.StandardScaler()\n","model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n","\n","metric = metrics.MAE()"],"metadata":{"id":"JABQpxPF9Eu4"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":378},"id":"Pgu8-Eh1_FNp","executionInfo":{"status":"ok","timestamp":1653244836316,"user_tz":-60,"elapsed":7,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"9a842e90-9f0c-4dfa-cf6a-a27e47082dad"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," TransformerUnion (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," Pipeline (\n"," FuncTransformer (\n"," func=\"get_hour_and_weekday\"\n"," ),\n"," TargetAgg (\n"," by=['station', 'hour', 'weekday']\n"," how=Mean ()\n"," target_name=\"y\"\n"," )\n"," )\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.001\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," )\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 0})}\n","\n","
"]},"metadata":{},"execution_count":32}]},{"cell_type":"code","source":["evaluate.progressive_val_score(X_y, model, metric, print_every=20_000)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"GKgfA9nW_Y3f","executionInfo":{"status":"ok","timestamp":1653244883337,"user_tz":-60,"elapsed":47025,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"35e6844f-237e-46de-db17-6f1d7df13e77"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[20,000] MAE: 3.323605\n","[40,000] MAE: 3.545601\n","[60,000] MAE: 3.609249\n","[80,000] MAE: 3.697535\n","[100,000] MAE: 3.703275\n","[120,000] MAE: 3.726729\n","[140,000] MAE: 3.789677\n","[160,000] MAE: 3.753734\n","[180,000] MAE: 3.734026\n"]},{"output_type":"execute_result","data":{"text/plain":["MAE: 3.733385"]},"metadata":{},"execution_count":33}]},{"cell_type":"code","source":["## Experiment with different optimizers: optim.RMSProp(), optim.Adam(), optim.NesterovMomentum(), etc"],"metadata":{"id":"y0XkEYaP_kbb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Let's try an ensemble. \n","\n","model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n","model += (\n"," get_hour_and_weekday |\n"," feature_extraction.TargetAgg(by=['station', 'hour', 'weekday'], how=stats.Mean())\n",")\n","\n","model += feature_extraction.TargetAgg(by='station', how=stats.EWMean(0.5))\n","model |= preprocessing.StandardScaler()\n","model |= ensemble.EWARegressor([ # <- This ensemble regressor will combine \n"," # 3 linear regression models trained with different optimizers. \n"," linear_model.LinearRegression(optim.SGD()), # The EWARegressor will run the 3 models in parallel \n"," linear_model.LinearRegression(optim.RMSProp()),# and assign weights to each model based on their individual performance.\n"," linear_model.LinearRegression(optim.Adam())\n","])\n","\n","metric = metrics.MAE()"],"metadata":{"id":"Z8opYP3uBkMR"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":378},"id":"iGN6_JODCdix","executionInfo":{"status":"ok","timestamp":1653244883342,"user_tz":-60,"elapsed":14,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"88a256e3-c48d-4206-d77c-46161a10b1e2"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," TransformerUnion (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," Pipeline (\n"," FuncTransformer (\n"," func=\"get_hour_and_weekday\"\n"," ),\n"," TargetAgg (\n"," by=['station', 'hour', 'weekday']\n"," how=Mean ()\n"," target_name=\"y\"\n"," )\n"," ),\n"," TargetAgg (\n"," by=['station']\n"," how=EWMean (\n"," alpha=0.5\n"," )\n"," target_name=\"y\"\n"," )\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," ), LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," ), LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," )]\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
y_ewm_0.5_by_station
\n","{'_feature_name': 'y_ewm_0.5_by_station',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station'],\n"," 'how': EWMean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
EWARegressor
\n","{'data': [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n",")],\n"," 'learning_rate': 0.5,\n"," 'loss': Squared({}),\n"," 'weights': [1.0, 1.0, 1.0]}\n","\n","
"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","source":["## Our evaluation will also improve\n","## We want to evaluate the model by forecasting 30 minutes ahead and only \n","## updating the model once the true values are available. \n","## This can be done using the moment and delay parameters in progressive_val_score.\n","evaluate.progressive_val_score(\n"," dataset=datasets.Bikes(),\n"," model=model,\n"," metric=metric,\n"," moment='moment',\n"," delay=dt.timedelta(minutes=30),\n"," print_every=20_000\n",")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6mFYKZjwCyn4","executionInfo":{"status":"ok","timestamp":1653244961842,"user_tz":-60,"elapsed":78512,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"1f57a799-0472-4b14-be97-386c03d8df2a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[20,000] MAE: 2.249336\n","[40,000] MAE: 2.238924\n","[60,000] MAE: 2.268627\n","[80,000] MAE: 2.283885\n","[100,000] MAE: 2.291669\n","[120,000] MAE: 2.272123\n","[140,000] MAE: 2.257576\n","[160,000] MAE: 2.28196\n","[180,000] MAE: 2.285042\n"]},{"output_type":"execute_result","data":{"text/plain":["MAE: 2.288666"]},"metadata":{},"execution_count":37}]},{"cell_type":"code","source":["model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n","model += (\n"," get_hour_and_weekday |\n"," feature_extraction.TargetAgg(by=['station', 'hour', 'weekday'], how=stats.Mean())\n",")\n","\n","model += feature_extraction.TargetAgg(by='station', how=stats.EWMean(0.5))\n","model |= preprocessing.StandardScaler()\n","model |= ensemble.EWARegressor([ \n"," linear_model.LinearRegression(optim.SGD()), \n"," linear_model.LinearRegression(optim.RMSProp()),\n"," linear_model.LinearRegression(optim.Adam()),\n"," neighbors.KNNRegressor(window_size=50)\n","])\n","\n","metric = metrics.MAE()"],"metadata":{"id":"jO2wR6BsDn78"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":378},"id":"FnsnyX2NFlHo","executionInfo":{"status":"ok","timestamp":1653245109631,"user_tz":-60,"elapsed":8,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"9f1db8a2-ebdc-4d3e-d9e0-48eb757b6e64"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," TransformerUnion (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," Pipeline (\n"," FuncTransformer (\n"," func=\"get_hour_and_weekday\"\n"," ),\n"," TargetAgg (\n"," by=['station', 'hour', 'weekday']\n"," how=Mean ()\n"," target_name=\"y\"\n"," )\n"," ),\n"," TargetAgg (\n"," by=['station']\n"," how=EWMean (\n"," alpha=0.5\n"," )\n"," target_name=\"y\"\n"," )\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," ), LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," ), LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," ), KNNRegressor (\n"," n_neighbors=5\n"," window_size=50\n"," leaf_size=30\n"," p=2\n"," aggregation_method=\"mean\"\n"," )]\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
y_ewm_0.5_by_station
\n","{'_feature_name': 'y_ewm_0.5_by_station',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station'],\n"," 'how': EWMean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
EWARegressor
\n","{'data': [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," KNNRegressor (\n"," n_neighbors=5\n"," window_size=50\n"," leaf_size=30\n"," p=2\n"," aggregation_method=\"mean\"\n",")],\n"," 'learning_rate': 0.5,\n"," 'loss': Squared({}),\n"," 'weights': [1.0, 1.0, 1.0, 1.0]}\n","\n","
"]},"metadata":{},"execution_count":40}]},{"cell_type":"code","source":["evaluate.progressive_val_score(\n"," dataset=datasets.Bikes(),\n"," model=model,\n"," metric=metric,\n"," moment='moment',\n"," delay=dt.timedelta(minutes=30),\n"," print_every=20_000\n",")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6FThDFSnFmAa","executionInfo":{"status":"ok","timestamp":1653245307267,"user_tz":-60,"elapsed":191399,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"09d0c56b-eacb-4182-e154-3a28a92b8531"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[20,000] MAE: 2.24487\n","[40,000] MAE: 2.236691\n","[60,000] MAE: 2.267139\n","[80,000] MAE: 2.282768\n","[100,000] MAE: 2.290776\n","[120,000] MAE: 2.271379\n","[140,000] MAE: 2.256938\n","[160,000] MAE: 2.281402\n","[180,000] MAE: 2.284546\n"]},{"output_type":"execute_result","data":{"text/plain":["MAE: 2.288177"]},"metadata":{},"execution_count":41}]},{"cell_type":"code","source":[""],"metadata":{"id":"Uz6_orOTFnht"},"execution_count":null,"outputs":[]}]}