Analysis for bike journeys on Fremont Bridge
Posted by Mr NotebookRoooky in Notebook
In [32]:
import numpy as np
import pandas as pd
from jupyterthemes import jtplot
Visualization Libraries¶
In [45]:
%matplotlib inline
import matplotlib.pyplot as plt
jtplot.style(theme='onedork')
#plt.style.use('seaborn')
Scikit Learn imports¶
In [46]:
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
Importing the data¶
In [47]:
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
Data inspection¶
In [48]:
data.head()
Out[48]:
In [49]:
data.describe()
Out[49]:
In [50]:
data.count()
Out[50]:
Using Pandas resample to get the overall picture of the whole data.¶
Notice how just by using the resample we can gain some insight on the data and how the usage of this bridge changes over time. Hourly, Daily, Weekly, Monthly and Annually.¶
In [51]:
data.resample('D').sum().plot()
plt.ylabel('Daily trips')
plt.title('Daily Trips VS Date')
Out[51]:
In [52]:
data.resample('W').sum().plot()
plt.ylabel('Weekly trips')
plt.title('Weekly Trips VS Date')
Out[52]:
In [53]:
data.resample('M').sum().plot()
plt.ylabel('Monthly trips')
plt.title('Monthly Trips VS Date')
Out[53]:
In [54]:
data.resample('Y').sum().plot()
plt.ylabel('Annual trips')
plt.title('Annual Trips VS Date')
Out[54]:
In [55]:
plt.subplots()
plt.title('Weekly Trips VS Date\n east & waste')
plt.ylabel('Weekly trips')
data['East'].resample('W').sum().plot(legend=True)
data['West'].resample('W').sum().plot(legend = True)
Out[55]:
In [56]:
plt.subplots()
plt.ylabel('Hourly trips')
plt.title('Hourly Trips VS Date')
data['East'].resample('H').sum().plot(legend=True)
data['West'].resample('H').sum().plot(legend = True)
Out[56]:
In [57]:
data['Total'] = data['West'] + data['East']
ax = data.resample('D').sum().rolling(365).sum().plot();
ax.set_ylim(0,None);
In [58]:
data.groupby(data.index.time).mean().plot()
plt.ylabel('Number of trips at a given time')
plt.title('Number of trips VS Time of the day')
Out[58]:
In [59]:
data.head()
Out[59]:
In [60]:
pivoted = data.pivot_table('Total',index=data.index.time, columns=data.index.date)
pivoted.iloc[:10,:5]
Out[60]:
In [61]:
pivoted.plot(legend = False)
Out[61]:
In [62]:
pivoted.index[:24]
Out[62]:
In [63]:
data.index
Out[63]:
Scikit Learn to do further analysis of the data.¶
In [64]:
x =pivoted.fillna(0).T.values
x.shape
Out[64]:
In [65]:
#treating each day as a projection using principle component analysis
x2 = PCA(2, svd_solver='full').fit_transform(x)
In [66]:
plt.scatter(x2[:,0],x2[:,1])
Out[66]:
using the gaussion mixture model to identify where the days fall¶
In [67]:
gmm = GaussianMixture(2)
gmm.fit(x)
labels = gmm.predict(x)
labels
Out[67]:
In [68]:
plt.scatter(x2[:,0],x2[:,1], c=labels, cmap='rainbow')
Out[68]:
Commuting days vs Non commuting days¶
In [69]:
pivoted.T[labels == 0].T.plot(legend = False, alpha = 0.20)
pivoted.T[labels == 1].T.plot(legend = False, alpha = 0.20)
Out[69]:
In [70]:
### The
In [71]:
daysofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
In [72]:
plt.scatter(x2[:,0],x2[:,1], c=daysofweek, cmap='rainbow')
cb = plt.colorbar(ticks=range(7))
cb.set_ticklabels(['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'])
Zooming in on the days of the week that happen to land on the non commuter days to see whats happening¶
In [73]:
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 0)& (daysofweek < 5)]
Out[73]:
In [74]:
#
In [ ]:
In [ ]:
In [ ]:
In [ ]: