-
Notifications
You must be signed in to change notification settings - Fork 21
/
SportsScience_EPV.py
122 lines (96 loc) · 5.37 KB
/
SportsScience_EPV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
Credits to Laurie Shaw's tutorial on Friends of Tracking: https://github.com/Friends-of-Tracking-Data-FoTD/LaurieOnTracking
'''
import Metrica_IO as mio
import Metrica_Viz as mviz
import Metrica_Velocities as mvel
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import Metrica_PitchControl as mpc
import Metrica_EPV as mepv
import sklearn
from sklearn import linear_model
DATADIR = 'C:/Users/sgopaladesikan/PycharmProjects/MMoF/Metrica/data'
game_id = 2 # let's look at sample match 2
# read in the event data
events = mio.read_event_data(DATADIR, game_id)
# read in tracking data
tracking_home = mio.tracking_data(DATADIR, game_id, 'Home')
tracking_away = mio.tracking_data(DATADIR, game_id, 'Away')
# Convert positions from metrica units to meters (note change in Metrica's coordinate system since the last lesson)
tracking_home = mio.to_metric_coordinates(tracking_home)
tracking_away = mio.to_metric_coordinates(tracking_away)
events = mio.to_metric_coordinates(events)
# reverse direction of play in the second half so that home team is always attacking from right->left
tracking_home, tracking_away, events = mio.to_single_playing_direction(tracking_home, tracking_away, events)
GK_numbers = [mio.find_goalkeeper(tracking_home),mio.find_goalkeeper(tracking_away)]
home_attack_direction = mio.find_playing_direction(tracking_home,'Home') # 1 if shooting left-right, else -1
#Set some global variables
player_ids = np.unique(list(c[:-2] for c in tracking_home.columns if c[:4] in ['Home', 'Away']))
maxspeed = 12
dt = tracking_home['Time [s]'].diff()
second_half_idx = tracking_home.Period.idxmax(2)
# Using Laurie's smoothing code
tracking_home = mvel.calc_player_velocities(tracking_home, smoothing=True)
tracking_away = mvel.calc_player_velocities(tracking_away, smoothing=True)
#Obtain the Unique Players
home_players = np.unique(list(c.split('_')[1] for c in tracking_home.columns if c[:4] == 'Home'))
away_players = np.unique(list(c.split('_')[1] for c in tracking_away.columns if c[:4] == 'Away'))
# Calculate these measures while in possession and out of possession
# Calculate the physical metrics of high or low EPV possessions (calculate each possession)
params = mpc.default_model_params()
EPV = mepv.load_EPV_grid(DATADIR+'/EPV_grid.csv')
mviz.plot_EPV(EPV,field_dimen=(106.0,68),attack_direction=home_attack_direction)
pass_events = events[events['Type'] == 'PASS']
pass_events['Poss_Seq'] = pass_events['Team'].ne(
pass_events['Team'].shift()).cumsum()
home_poss = pass_events[pass_events['Team']=='Home']
home_poss_list = []
for i in np.unique(home_poss['Poss_Seq']):
print(i)
start_time = min(home_poss[home_poss['Poss_Seq']==i]['Start Time [s]'])
end_time = max(home_poss[home_poss['Poss_Seq']==i]['End Time [s]'])
half_temp = np.unique(home_poss[home_poss['Poss_Seq']==i]['Period'])
#Get the total distance of both teams as well as the total EPV
pass_poss = home_poss[home_poss['Poss_Seq']==i]
poss_distance = []
tracking_poss = tracking_home[(tracking_home['Time [s]']>=start_time) & (tracking_home['Time [s]']<=end_time) & (tracking_home['Period'].isin(half_temp))]
for player in home_players:
column = 'Home_' + player + '_speed'
player_distance = tracking_poss.loc[tracking_poss[column] >= 3,column].sum() / 25. / 1000
poss_distance.append(player_distance)
opp_distance = []
tracking_opp = tracking_away[
(tracking_away['Time [s]'] >= start_time) & (tracking_away['Time [s]'] <= end_time) & (
tracking_away['Period'].isin(half_temp))]
for player in away_players:
column = 'Away_' + player + '_speed'
player_distance = tracking_opp.loc[tracking_opp[column] >= 3,column].sum() / 25. / 1000
opp_distance.append(player_distance)
eepv_added = []
for i in pass_poss.index:
EEPV_added, EPV_diff = mepv.calculate_epv_added(i, events, tracking_home, tracking_away, GK_numbers,
EPV, params)
eepv_added.append(EEPV_added)
total_dist = np.sum(poss_distance)
total_opp_dist = np.sum(opp_distance)
total_eepv = np.sum(eepv_added)
home_poss_list.append([total_dist,total_opp_dist,total_eepv])
home_eepv_df = pd.DataFrame(np.array(home_poss_list).reshape(68,3), columns = ['HomeDist','AwayDist','EEPV'])
home_eepv_df.plot.scatter(x='HomeDist',
y='EEPV')
lm = sklearn.linear_model.LinearRegression().fit(np.array(home_eepv_df['HomeDist']).reshape(-1,1),np.array(home_eepv_df['EEPV']).reshape(-1,1))
lm_score = lm.score(np.array(home_eepv_df['HomeDist']).reshape(-1,1),np.array(home_eepv_df['EEPV']).reshape(-1,1))
#0.6397945808713286, 0.6730582132032926, 0.7568845621055171, 0.7869140810900896, 0.752852377695006, 0.6046637586000496
y = np.array(home_eepv_df['EEPV']).reshape(-1,1)
yhat = lm.predict(np.array(home_eepv_df['HomeDist']).reshape(-1,1))
plt.scatter(home_eepv_df['HomeDist'],home_eepv_df['EEPV'])
plt.plot(home_eepv_df['HomeDist'],yhat,color="red")
plt.title("Total Distance [>= 3m/s]")
plt.annotate(lm_score,xy=(1,.2))
#manually calculate lm_score
#SS_Residual = sum((y-yhat)**2)
#SS_Total = sum((y-np.mean(y))**2)
#r_squared = 1 - (float(SS_Residual))/SS_Total
##Optional: Are the EPV chances statistically better when the team is pacing above average together?