{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PS-Regression.ipynb","provenance":[],"authorship_tag":"ABX9TyMUxMMGOEfggrVYjL6jkgP2"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"z-0ODmpU5kdH"},"outputs":[],"source":["### The bike sharing forecasting problem (river tutorial)"]},{"cell_type":"code","source":["!pip install river"],"metadata":{"id":"6zYUgVlK5usm"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from river import datasets\n","from pprint import pprint\n","from river import compose\n","from river import linear_model\n","from river import metrics\n","from river import evaluate\n","from river import preprocessing\n","from river import optim\n","from river import feature_extraction\n","from river import stats\n","from river import ensemble\n","from river import neighbors\n","import datetime as dt"],"metadata":{"id":"t2JoFJ0N53jt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["## Look at the first entry of the dataset\n","\n","X_y = datasets.Bikes() # Connect to the stream\n","\n","for x, y in X_y:\n"," pprint(x)\n"," print(f'Number of available bikes: {y}') # <- We want to predict the number of available bikes\n"," print(f'Number of features: {len(x)}')\n"," break # exit after the first call "],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PqiDft-N57bi","executionInfo":{"status":"ok","timestamp":1653242232262,"user_tz":-60,"elapsed":406,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"2cd1ab8c-6f17-44eb-e2ec-30c56c59af84"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["{'clouds': 75,\n"," 'description': 'light rain',\n"," 'humidity': 81,\n"," 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n"," 'pressure': 1017.0,\n"," 'station': 'metro-canal-du-midi',\n"," 'temperature': 6.54,\n"," 'wind': 9.3}\n","Number of available bikes: 1\n","Number of features: 8\n"]}]},{"cell_type":"code","source":["model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind') # Select only numeric features.\n","model |= preprocessing.StandardScaler()\n","model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001)) # <-- Optimizer is Stochastic Gradient Descent. LR=1e-3\n","\n","metric = metrics.MAE()"],"metadata":{"id":"qlsJ3q5w6LZY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":260},"id":"G2a7Gzpt7ye9","executionInfo":{"status":"ok","timestamp":1653242542484,"user_tz":-60,"elapsed":372,"user":{"displayName":"Claudia Soares","userId":"01695063174396797696"}},"outputId":"eb99e80a-d10e-475c-f47d-a68a092db979"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Pipeline (\n"," Select (\n"," clouds\n"," humidity\n"," pressure\n"," temperature\n"," wind\n"," ),\n"," StandardScaler (\n"," with_std=True\n"," ),\n"," LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.001\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n"," )\n",")"],"text/html":["
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
StandardScaler
\n","{'counts': Counter({'pressure': 182470,\n"," 'clouds': 182470,\n"," 'temperature': 182470,\n"," 'wind': 182470,\n"," 'humidity': 182470}),\n"," 'means': defaultdict(<class 'float'>,\n"," {'clouds': 30.315131254453505,\n"," 'humidity': 62.24244533347998,\n"," 'pressure': 1017.0563060996391,\n"," 'temperature': 20.50980692716619,\n"," 'wind': 3.4184331122924543}),\n"," 'vars': defaultdict(<class 'float'>,\n"," {'clouds': 1389.0025610928221,\n"," 'humidity': 349.59967918503554,\n"," 'pressure': 33.298307526514115,\n"," 'temperature': 34.70701720774977,\n"," 'wind': 4.473627075744674}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {'pressure': 7.775241118066252, 'clouds': -0.9309940243402086, 'temperature': 1.3677842059050738, 'wind': -0.1680275080383196, 'humidity': 7.363588933751584},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 5.101412976082054,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 182470})}\n","\n","
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour
\n","def get_hour(x):\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour
\n","{'_feature_name': 'y_mean_by_station_and_hour',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 0})}\n","\n","
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
LinearRegression
\n","{'_weights': {},\n"," '_y_name': None,\n"," 'clip_gradient': 1000000000000.0,\n"," 'initializer': Zeros (),\n"," 'intercept': 0.0,\n"," 'intercept_init': 0.0,\n"," 'intercept_lr': Constant({'learning_rate': 0.01}),\n"," 'l2': 0.0,\n"," 'loss': Squared({}),\n"," 'optimizer': SGD({'lr': Constant({'learning_rate': 0.001}), 'n_iterations': 0})}\n","\n","
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
y_ewm_0.5_by_station
\n","{'_feature_name': 'y_ewm_0.5_by_station',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station'],\n"," 'how': EWMean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
EWARegressor
\n","{'data': [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n",")],\n"," 'learning_rate': 0.5,\n"," 'loss': Squared({}),\n"," 'weights': [1.0, 1.0, 1.0]}\n","\n","
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
\n","{'keys': {'pressure', 'clouds', 'temperature', 'wind', 'humidity'}}\n","\n","
get_hour_and_weekday
\n","def get_hour_and_weekday(x):\n"," x['weekday'] = (x['moment'].weekday() < 6) # Mon:0,Tue:1,...,Sat:6,Sun:7\n"," x['hour'] = x['moment'].hour\n"," return x\n","\n","
y_mean_by_station_and_hour_and_weekday
\n","{'_feature_name': 'y_mean_by_station_and_hour_and_weekday',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station', 'hour', 'weekday'],\n"," 'how': Mean: 0.,\n"," 'on': 'y'}\n","\n","
y_ewm_0.5_by_station
\n","{'_feature_name': 'y_ewm_0.5_by_station',\n"," '_groups': defaultdict(..., {}),\n"," 'by': ['station'],\n"," 'how': EWMean: 0.,\n"," 'on': 'y'}\n","\n","
StandardScaler
\n","{'counts': Counter(),\n"," 'means': defaultdict(<class 'float'>, {}),\n"," 'vars': defaultdict(<class 'float'>, {}),\n"," 'with_std': True}\n","\n","
EWARegressor
\n","{'data': [LinearRegression (\n"," optimizer=SGD (\n"," lr=Constant (\n"," learning_rate=0.01\n"," )\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=RMSProp (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," rho=0.9\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," LinearRegression (\n"," optimizer=Adam (\n"," lr=Constant (\n"," learning_rate=0.1\n"," )\n"," beta_1=0.9\n"," beta_2=0.999\n"," eps=1e-08\n"," )\n"," loss=Squared ()\n"," l2=0.\n"," intercept_init=0.\n"," intercept_lr=Constant (\n"," learning_rate=0.01\n"," )\n"," clip_gradient=1e+12\n"," initializer=Zeros ()\n","),\n"," KNNRegressor (\n"," n_neighbors=5\n"," window_size=50\n"," leaf_size=30\n"," p=2\n"," aggregation_method=\"mean\"\n",")],\n"," 'learning_rate': 0.5,\n"," 'loss': Squared({}),\n"," 'weights': [1.0, 1.0, 1.0, 1.0]}\n","\n","