Issue
I've been trying to figure this out for quite a while, but can't seem to get it. I have the following shot_df
that consists of x and y coordinates for a given basketball shot, and whether or not it was made. Here is a sample of my data:
LOC_X LOC_Y SHOT_MADE_FLAG
0 -228 85 1
1 206 147 0
2 30 99 0
3 0 0 1
4 68 250 0
5 99 89 0
6 3 3 1
7 -3 1 1
8 -19 259 0
9 226 68 0
10 14 25 0
11 0 0 1
12 -166 183 1
13 24 3 1
14 23 8 1
15 160 187 0
16 28 46 1
17 -160 208 1
18 101 180 0
19 -23 43 1
I have the following code that creates a hexplot of the data points and is shown below (ignore the basketball court, that code isn't needed as it is just for aesthetics). The hexbins are sized by frequency of data points within each hex:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12,10))
hexbin = plt.hexbin(shot_df.LOC_X, shot_df.LOC_Y, gridsize=10,cmap='RdYlBu_r',edgecolors='black',mincnt=0)
plt.xlim(-250,250)
plt.ylim(422.5, -47.5)
gca = plt.gca()
gca.axes.get_xaxis().set_visible(False)
gca.axes.get_yaxis().set_visible(False)
plt.colorbar()
#size bins by frequency
from matplotlib.collections import PatchCollection
from matplotlib.path import Path
from matplotlib.patches import PathPatch
def sized_hexbin(ax,hc):
offsets = hc.get_offsets()
orgpath = hc.get_paths()[0]
verts = orgpath.vertices
values = hc.get_array()
ma = values.max()
patches = []
for offset,val in zip(offsets,values):
v1 = verts*val/ma+offset
path = Path(v1, orgpath.codes)
patch = PathPatch(path)
patches.append(patch)
pc = PatchCollection(patches, cmap='RdYlBu_r', edgecolors='black')
pc.set_array(values)
ax.add_collection(pc)
hc.remove()
sized_hexbin(ax,hexbin)
Now, the last thing I want to do is change the color to the average of SHOT_MADE_FLAG
. That is, I want them to be colored by the average of each bin. That way, the size can indicate how many shots are from each hex, and the color can indicate how often a shot from that hex is made.
They way I have it currently set up, both color and size are depicting frequency, which is redundant. Again, I would like to change the color of a bin to the average of SHOT_MADE_FLAG
within a given bin.
Thanks!
Solution
So not exactly perfect and there's quite a bit going on here. I can't take all the credit, as I followed along in a tutorial, so I would suggest you do the same. It's very good and will help you, but I'm ultimately posting what I got so far to show you the "rough draft".
So refer to these links:
Function to draw court is here:
from matplotlib.patches import Circle, Rectangle, Arc
def draw_court(ax=None, color='black', lw=2, outer_lines=False):
# If an axes object isn't provided to plot onto, just get current one
if ax is None:
ax = plt.gca()
# Create the various parts of an NBA basketball court
# Create the basketball hoop
# Diameter of a hoop is 18" so it has a radius of 9", which is a value
# 7.5 in our coordinate system
hoop = Circle((0, 0), radius=7.5, linewidth=lw, color=color, fill=False)
# Create backboard
backboard = Rectangle((-30, -7.5), 60, -1, linewidth=lw, color=color)
# The paint
# Create the outer box 0f the paint, width=16ft, height=19ft
outer_box = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color=color,
fill=False)
# Create the inner box of the paint, widt=12ft, height=19ft
inner_box = Rectangle((-60, -47.5), 120, 190, linewidth=lw, color=color,
fill=False)
# Create free throw top arc
top_free_throw = Arc((0, 142.5), 120, 120, theta1=0, theta2=180,
linewidth=lw, color=color, fill=False)
# Create free throw bottom arc
bottom_free_throw = Arc((0, 142.5), 120, 120, theta1=180, theta2=0,
linewidth=lw, color=color, linestyle='dashed')
# Restricted Zone, it is an arc with 4ft radius from center of the hoop
restricted = Arc((0, 0), 80, 80, theta1=0, theta2=180, linewidth=lw,
color=color)
# Three point line
# Create the side 3pt lines, they are 14ft long before they begin to arc
corner_three_a = Rectangle((-220, -47.5), 0, 140, linewidth=lw,
color=color)
corner_three_b = Rectangle((220, -47.5), 0, 140, linewidth=lw, color=color)
# 3pt arc - center of arc will be the hoop, arc is 23'9" away from hoop
# I just played around with the theta values until they lined up with the
# threes
three_arc = Arc((0, 0), 475, 475, theta1=22, theta2=158, linewidth=lw,
color=color)
# Center Court
center_outer_arc = Arc((0, 422.5), 120, 120, theta1=180, theta2=0,
linewidth=lw, color=color)
center_inner_arc = Arc((0, 422.5), 40, 40, theta1=180, theta2=0,
linewidth=lw, color=color)
# List of the court elements to be plotted onto the axes
court_elements = [hoop, backboard, outer_box, inner_box, top_free_throw,
bottom_free_throw, restricted, corner_three_a,
corner_three_b, three_arc, center_outer_arc,
center_inner_arc]
if outer_lines:
# Draw the half court line, baseline and side out bound lines
outer_lines = Rectangle((-250, -47.5), 500, 470, linewidth=lw,
color=color, fill=False)
court_elements.append(outer_lines)
# Add the court elements onto the axes
for element in court_elements:
ax.add_patch(element)
return ax
Sample date:
import pandas as pd
shots_df = pd.DataFrame([[-228,85,1],
[206,147,0],
[30,99,0],
[0,0,1],
[0,0,1],
[0,0,1],
[0,0,1],
[0,0,1],
[0,0,0],
[0,0,0],
[68,250,0],
[99,89,0],
[99,89,1],
[99,89,1],
[99,89,1],
[99,89,0],
[99,89,0],
[3,3,1],
[-3,1,1],
[-19,259,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[226,68,0],
[14,25,0],
[0,0,1],
[-166,183,1],
[24,3,1],
[23,8,1],
[160,187,0],
[28,46,1],
[-160,208,1],
[101,180,0],
[-23,43,1]],columns=['LOC_X','LOC_Y','SHOT_MADE_FLAG'])
Code for plot:
import matplotlib.pyplot as plt
import numpy as np
min_shots = 1
shots_df = shots_df.rename(columns={'LOC_X':'original_x',
'LOC_Y':'original_y',
'SHOT_MADE_FLAG':'shot_made'})
shots_hex = plt.hexbin(
shots_df.original_x, shots_df.original_y,
extent=(-250, 250, 422.5, -47.5), cmap='Reds', gridsize=10)
shots_by_hex = shots_hex.get_array()
freq_by_hex = shots_by_hex / sum(shots_by_hex)
sizes = freq_by_hex
sizes = sizes / max(sizes) * 960
plt.close()# this closes the plot window
makes_df = shots_df[shots_df.shot_made == 1]
makes_hex = plt.hexbin(
makes_df['original_x'], makes_df['original_y'],
extent=(-250, 250, 422.5, -47.5), cmap='Reds', gridsize=10)
plt.close()
with np.errstate(divide='ignore',invalid='ignore'):
pcts_by_hex = makes_hex.get_array() / shots_hex.get_array()
pcts_by_hex[np.isnan(pcts_by_hex)] = 0 # convert NAN values to 0
sample_sizes = shots_hex.get_array()
filter_threshold = min_shots
for i in range(len(pcts_by_hex)):
if sample_sizes[i] < filter_threshold:
pcts_by_hex[i] = 0
x = [i[0] for i in shots_hex.get_offsets()]
y = [i[1] for i in shots_hex.get_offsets()]
z = pcts_by_hex * 100
max_freq = max(freq_by_hex)
max_size = max(sizes)
plt.figure(figsize=(5*2, 4.7*2))
plt.xlim(250, -250)
plt.ylim(-47.5, 422.5)
scatter = plt.scatter(x, y, c=z, cmap='Reds', marker='h', edgecolors='black', s=sizes,)
draw_court(outer_lines=True)
cur_axes = plt.gca()
cur_axes.axes.get_xaxis().set_visible(False)
cur_axes.axes.get_yaxis().set_visible(False)
legend1 = plt.legend(
*scatter.legend_elements(num=6, fmt="{x:.0f}%"),
loc="upper right", title='Shot\nacc', fontsize='small')
legend2 = plt.legend(
*scatter.legend_elements(
'sizes', num=6, alpha=0.8, fmt="{x:.1f}%"
, func=lambda s: s / max_size * max_freq * 100
),
loc='upper left', title='Freq (%)', fontsize='small')
plt.gca().add_artist(legend1)
Output:
Answered By - chitown88
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.