import pandas as pd
import matplotlib.pyplot as plt
import warnings
import statsmodels.api as sm
warnings.filterwarnings("ignore")
%matplotlib inline

workout = pd.read_csv('Workout_data.csv', index_col = 'Date', parse_dates = ['Date'])
display(workout.sample(3))
print(workout.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 508 entries, 2018-11-11 14:05:12 to 2012-08-22 18:53:54
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Activity Id               508 non-null    object 
 1   Type                      508 non-null    object 
 2   Route Name                1 non-null      object 
 3   Distance (km)             508 non-null    float64
 4   Duration                  508 non-null    object 
 5   Average Pace              508 non-null    object 
 6   Average Speed (km/h)      508 non-null    float64
 7   Calories Burned           508 non-null    float64
 8   Climb (m)                 508 non-null    int64  
 9   Average Heart Rate (bpm)  294 non-null    float64
 10  Friend's Tagged           0 non-null      float64
 11  Notes                     231 non-null    object 
 12  GPX File                  504 non-null    object 
dtypes: float64(5), int64(1), object(7)
memory usage: 55.6+ KB
None


cols = ['Notes', 'Calories Burned', 'GPX File', 'Activity Id', 'Friend\'s Tagged', 'Route Name']
workout.drop(columns=cols, inplace=True)
display(workout.Type.value_counts())
workout['Type'] = workout.Type.str.replace("Other", "Unicycling")
workout.isnull().sum()

Running    459
Cycling     29
Walking     18
Other        2
Name: Type, dtype: int64

Type                          0
Distance (km)                 0
Duration                      0
Average Pace                  0
Average Speed (km/h)          0
Climb (m)                     0
Average Heart Rate (bpm)    214
dtype: int64


cycle_avg = workout[workout['Type'] == 'Cycling']['Average Heart Rate (bpm)'].mean()
run_avg = workout[workout['Type'] == 'Running']['Average Heart Rate (bpm)'].mean()

walk = workout[workout['Type'] == 'Walking'].copy()
cycle = workout[workout['Type'] == 'Cycling'].copy()
run = workout[workout['Type'] == 'Running'].copy()

run['Average Heart Rate (bpm)'].fillna(int(run_avg), inplace=True)
cycle['Average Heart Rate (bpm)'].fillna(int(cycle_avg), inplace=True)
walk['Average Heart Rate (bpm)'].fillna(110, inplace=True)

run.isnull().sum()

Type                        0
Distance (km)               0
Duration                    0
Average Pace                0
Average Speed (km/h)        0
Climb (m)                   0
Average Heart Rate (bpm)    0
dtype: int64


run_13_18 = run['20190101':'20130101']
run_13_18.plot(subplots=True,
          sharex=False,
          figsize=(12,16),
          linestyle='none',
          marker='o',
          markersize=3)

plt.show()


run_15_18 = run['20190101':'20150101']

print('Running last 4 years average:')
display(run_15_18.resample('A').mean())

print('Last 4 years weekly average:')
display(run_15_18.resample('W').mean().mean())

weekly_training = run_15_18['Distance (km)'].resample('W').count().mean()
print('Average trainings per week:', weekly_training)

Running last 4 years average:

Last 4 years weekly average:

Distance (km)                12.518176
Average Speed (km/h)         10.835473
Climb (m)                   158.325444
Average Heart Rate (bpm)    144.801775
dtype: float64

Average trainings per week: 1.5


run_dist = run_15_18['Distance (km)']
run_hr = run_15_18['Average Heart Rate (bpm)']

fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True, figsize=(12, 8))

run_dist.plot(ax=ax1)
ax1.set(ylabel='Distance (km)', title='Historical data with averages')
ax1.axhline(run_dist.mean(), color='blue', linewidth=1, linestyle='-.')

run_hr.plot(ax=ax2, color='red')
ax2.set(xlabel='Date', ylabel='Average Heart Rate (bpm)')
ax2.axhline(run_hr.mean(), color = 'blue', linewidth=1, linestyle='-.')

plt.show()


run_annual = run['2018':'2013']['Distance (km)'].resample('A').sum()

fig = plt.figure(figsize=(15.0, 6.0))
ax = run_annual.plot(marker='*', markersize=14, linewidth=0, color='#4361EE')
ax.set(ylim=[0, 1210], 
       xlabel='Years',
       title='Annual totals for distance',
       xlim=['2012','2019'],
       ylabel='Distance (km)')

ax.axhspan(1000, 1210, color='#CAFFBF', alpha=0.8)
ax.axhspan(800, 1000, color='#FDFFB6', alpha=0.8)
ax.axhspan(0, 800, color='#FFADAD', alpha=0.8)

plt.show()


run_dist_wk = run['2018':'2013']['Distance (km)'].resample('W').bfill()
x = sm.tsa.seasonal_decompose(run_dist_wk, extrapolate_trend=1, freq=52)

fig = plt.figure(figsize=(12, 5))
ax = x.trend.plot(label='Trend', linewidth=2)
ax = x.observed.plot(label='Observed', linewidth=0.5, color='#4361EE')
ax.legend()
ax.set(xlabel='Years',
       ylabel='Distance (km)')
ax.set_title('Running distance trend')
plt.show()


zone = [100, 125, 133, 142, 151, 173]
name = ['Easy', 'Moderate', 'Hard', 'Expert', 'God Like']
color = ['green', 'yellow', 'orange', 'tomato', 'red']
run_hr = run['2018':'2015 3']['Average Heart Rate (bpm)']

fig, ax = plt.subplots(figsize=(8, 5))
n, bins, patches = ax.hist(run_hr, bins=zone, alpha=0.5)
for i in range(0, len(patches)):
    patches[i].set_facecolor(color[i])
ax.set(title='Distribution of HR', ylabel='Number of runs')
ax.xaxis.set(ticks=zone[0:5])
ax.set_xticklabels(labels=name, rotation=-30, ha='left')

plt.show()


run_walk_cycle = df_run.append(walk).append(cycle).sort_index(ascending=False)
dist_climb, speed_col = ['Distance (km)', 'Climb (m)'], ['Average Speed (km/h)']
totals = run_walk_cycle.groupby('Type')[dist_climb].sum()

print('Training types totals:')
display(totals)

summary = run_walk_cycle.groupby('Type')[dist_climb + speed_col].describe()
for i in dist_climb:
    summary[i, 'total'] = totals[i]
print('Training types statistics summary:')
summary.stack()

Training types totals:

Training types statistics summary:

	Activity Id	Type	Route Name	Distance (km)	Duration	Average Pace	Average Speed (km/h)	Calories Burned	Climb (m)	Average Heart Rate (bpm)	Friend's Tagged	Notes	GPX File
Date
2013-04-13 08:51:29	2c971ecf-efdd-4a3d-be67-ba249aa5557a	Running	NaN	6.34	33:02	5:13	11.51	454.0	33	NaN	NaN	NaN	2013-04-13-085129.gpx
2016-12-06 18:36:37	159d92f4-c96b-43ea-88a3-8f1060d170ed	Running	NaN	6.99	41:34	5:57	10.08	500.0	49	139.0	NaN	TomTom MySports Watch	2016-12-06-183637.gpx
2013-07-19 18:40:13	0547ce8b-c7c8-40b6-bb71-feff6298c5be	Running	NaN	1.72	8:33	4:58	12.07	40.0	6	NaN	NaN	NaN	2013-07-19-184013.gpx

	Distance (km)	Average Speed (km/h)	Climb (m)	Average Heart Rate (bpm)
Date
2015-12-31	13.602805	10.998902	160.170732	143.353659
2016-12-31	11.411667	10.837778	133.194444	143.388889
2017-12-31	12.935176	10.959059	169.376471	145.247059
2018-12-31	13.339063	10.777969	191.218750	148.125000

		Average Speed (km/h)	Climb (m)	Distance (km)
Type
Cycling	25%	16.980000	139.000000	15.530000
	50%	19.500000	199.000000	20.300000
	75%	21.490000	318.000000	29.400000
	count	29.000000	29.000000	29.000000
	max	24.330000	553.000000	49.180000
	mean	19.125172	240.551724	23.468276
	min	11.380000	58.000000	11.410000
	std	3.257100	128.960289	9.451040
	total	NaN	6976.000000	680.580000
Running	25%	10.495000	54.000000	7.415000
	50%	10.980000	91.000000	10.810000
	75%	11.520000	171.000000	13.190000
	count	459.000000	459.000000	459.000000
	max	20.720000	982.000000	38.320000
	mean	11.056296	124.788671	11.382353
	min	5.770000	0.000000	0.760000
	std	0.953273	103.382177	4.937853
	total	NaN	57278.000000	5224.500000
Walking	25%	5.555000	7.000000	1.385000
	50%	5.970000	10.000000	1.485000
	75%	6.512500	15.500000	1.787500
	count	18.000000	18.000000	18.000000
	max	6.910000	112.000000	4.290000
	mean	5.549444	19.388889	1.858333
	min	1.040000	5.000000	1.220000
	std	1.459309	27.110100	0.880055
	total	NaN	349.000000	33.450000

Physical Activity & Fitness Analysis¶

Introduction¶

Objetive¶

Data¶

Data Preprocessing¶

Exploratory Data Analysis¶

Summary¶

Conclusions and Recommendations¶