Issue
I've created a LP solver in Jupyter notebooks that is giving me some issues. Specifically, when I run the last line of code in the script below, I get the error message saying The kernel appears to have died. It will restart automatically.
Edit: the final dataframe, dfs_proj
, is a 240-row, 5-column dataframe.
import pandas as pd
from pulp import *
from pulp import LpMaximize
dfs_proj = pd.read_csv("4for4_dfs_projections_120321.csv")
dfs_proj['count'] = 1
cols = ['Player', 'Pos', 'FFPts', 'DK ($)', 'count']
dfs_proj = dfs_proj[cols]
dfs_proj = dfs_proj[(dfs_proj['DK ($)'] >= 4000) | (dfs_proj['Pos'] == "DEF") | (dfs_proj['Pos'] == "TE")]
player_dict = dict(zip(dfs_proj['Player'], dfs_proj['count']))
# create a helper function to return the number of players assigned each position
def get_position_sum(player_vars, df, position):
return pulp.lpSum([player_vars[i] * (position in df['Pos'].iloc[i]) for i in range(len(df))])
def get_optimals(site, data, num_lineups, optimize_on='FFPts'):
"""
Generates x number of optimal lineups, based on the column to
designate as the one to optimize on.
:param str site: DK or FD. Used for salary constraints
:param pd.DataFrame data: Pandas dataframe containing projections.
:param int num_lineups: Number of lineups to generate.
:param str optimize_on: Name of column in dataframe to use when optimizing
"""
#global lineups
lineups = []
player_dict = dict(zip(data['Player'], data['count']))
for i in range(1, num_lineups+1):
prob = pulp.LpProblem('DK_NFL_weekly', pulp.const.LpMaximize)
player_vars = []
for row in data.itertuples():
var = pulp.LpVariable(f'{row.Player}', cat='Binary')
player_vars.append((row.Player, var))
# total assigned players constraint
prob += pulp.lpSum(player_var for player_var in player_vars) == 9
# total salary constraint
prob += pulp.lpSum(data['DK ($)'].iloc[i] * player_vars[i][1] for i in range(len(data))) <= 50000
# for QB and DST, require 1 of each in the lineup
prob += get_position_sum(player_vars, df, 'QB') == 1
prob += get_position_sum(player_vars, df, 'DEF') == 1
# to account for the FLEX position, we allow additional selections of the 3 FLEX-eligible positions: RB, WR, TE
prob += get_position_sum(player_vars, df, 'RB') >= 2
prob += get_position_sum(player_vars, df, 'WR') >= 3
prob += get_position_sum(player_vars, df, 'TE') >= 1
if i > 1:
if optimize_on == 'Optimal Frequency':
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))]) <= (optimal - 0.001)
else:
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))]) <= (optimal - 0.01)
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))])
# solve and print the status
prob.solve(PULP_CBC_CMD(msg=False))
optimal = prob.objective.value()
count = 1
lineup = {}
for i in range(len(data)):
if player_vars[i][1].value() == 1:
row = data.iloc[i]
lineup[f'G{count}'] = row['Player']
count += 1
lineup['Total Points'] = optimal
lineups.append(lineup)
players = list(lineup.values())
for i in range(0, len(players)):
if type(players[i]) == str:
player_dict[players[i]] += 1
if player_dict[players[i]] == 45:
data = data[data['Player'] != players[i]]
return lineups
lineups = get_optimals(dfs_proj, 20, 'FFPts')
I have tried reinstalling all the libraries that are used in the script and still get the same issue. Even running it in a normal Python script gives me the same error message. I think this might have to do with memory, but I'm not sure how to check for that or adjust for that, either.
Thanks in advance for any help!
Solution
You had a handful of typos here... Not sure if/how you got this running.
A couple of issues you had:
- You co-mingled
df
anddata
variable names inside your function. So who knows what that was pulling in. (One of the hazards of working in a notebook.) - In several locations where you used
player_vars
you were not indexing the tuple to get the variable piece, I'd suggest you use theLpVariable.dicts()
for these, it is easier to manage. - Your function call doesn't account for
site
in the function params.
Other advice:
Do NOT turn off the messaging. You must check the solver output to see the status. First attempts came back as "infeasible" which is how I discovered the
player_vars
problem. If you do decide to turn off the message, figure out a way toassert(status==optimal)
or risk junk results. I think it is doable inpulp
, I just forgot how. Edit: here's how. This works when using the default CBC solver, after solving (obviously). Other solvers, not sure:status = LpStatus[prob.status] assert(status=='Optimal')
print out the problem a couple times to see if it passes the giggle test while building it. If you had done this, you would have seen some of the construction problems.
Anyhow, this is working fine for fake data and handles 1000+ players in a couple seconds for 20 lineups.
Buyer beware: I did not review all of the constraints too closely or the conditional constraint, so you should.
import pandas as pd
from pulp import *
# from pulp import LpMaximize
from random import randint, choice
num_players = 1000
positions = ['RB', 'WR', 'TE', 'DEF', 'QB']
players = [(i, choice(positions), randint(1,100), randint(3000,5000), 1) for i in range(num_players)]
cols = ['Player', 'Pos', 'FFPts', 'DK ($)', 'count']
dfs_proj = pd.DataFrame.from_records(players, columns = cols)
print(dfs_proj.head())
# dfs_proj = pd.read_csv("4for4_dfs_projections_120321.csv")
# dfs_proj['count'] = 1
# cols = ['Player', 'Pos', 'FFPts', 'DK ($)', 'count']
# dfs_proj = dfs_proj[cols]
dfs_proj = dfs_proj[(dfs_proj['DK ($)'] >= 4000) | (dfs_proj['Pos'] == "DEF") | (dfs_proj['Pos'] == "TE")]
# player_dict = dict(zip(dfs_proj['Player'], dfs_proj['count']))
print(dfs_proj.head())
# create a helper function to return the number of players assigned each position
def get_position_sum(player_vars, df, position):
return pulp.lpSum([player_vars[i][1] * (position in df['Pos'].iloc[i]) for i in range(len(df))]) #player vars not indexed
#def get_optimals(site, data, num_lineups, optimize_on='FFPts'): # site??? # data vs df ???
def get_optimals(data, num_lineups, optimize_on='FFPts'):
"""
Generates x number of optimal lineups, based on the column to
designate as the one to optimize on.
:param str site: DK or FD. Used for salary constraints
:param pd.DataFrame data: Pandas dataframe containing projections.
:param int num_lineups: Number of lineups to generate.
:param str optimize_on: Name of column in dataframe to use when optimizing
"""
#global lineups
lineups = []
player_dict = dict(zip(data['Player'], data['count']))
for i in range(1, num_lineups+1):
prob = pulp.LpProblem('DK_NFL_weekly', pulp.const.LpMaximize)
player_vars = []
for row in data.itertuples():
var = pulp.LpVariable(f'P{row.Player}', cat='Binary') # added 'P' to player name for clarity
player_vars.append((row.Player, var))
# total assigned players constraint
prob += pulp.lpSum(player_var[1] for player_var in player_vars) == 9 # player var not indexed
# total salary constraint
prob += pulp.lpSum(data['DK ($)'].iloc[i] * player_vars[i][1] for i in range(len(data))) <= 50000
# for QB and DST, require 1 of each in the lineup
# !!!! you had 'df' here which who knows what you were pulling in.... changed to data
prob += get_position_sum(player_vars, data, 'QB') == 1
prob += get_position_sum(player_vars, data, 'DEF') == 1
# to account for the FLEX position, we allow additional selections of the 3 FLEX-eligible positions: RB, WR, TE
prob += get_position_sum(player_vars, data, 'RB') >= 2
prob += get_position_sum(player_vars, data, 'WR') >= 3
prob += get_position_sum(player_vars, data, 'TE') >= 1
if i > 1:
if optimize_on == 'Optimal Frequency':
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))]) <= (optimal - 0.001)
else:
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))]) <= (optimal - 0.01)
prob += pulp.lpSum([data['FFPts'].iloc[i] * player_vars[i][1] for i in range(len(data))])
print(prob)
# solve and print the status
prob.solve(PULP_CBC_CMD())
optimal = prob.objective.value()
count = 1
lineup = {}
for i in range(len(data)):
if player_vars[i][1].value() == 1:
row = data.iloc[i]
lineup[f'G{count}'] = row['Player']
count += 1
lineup['Total Points'] = optimal
lineups.append(lineup)
players = list(lineup.values())
for i in range(0, len(players)):
if type(players[i]) == str:
player_dict[players[i]] += 1
if player_dict[players[i]] == 45:
data = data[data['Player'] != players[i]]
return lineups
lineups = get_optimals(dfs_proj, 10, 'FFPts')
for lineup in lineups:
print(lineup)
Answered By - AirSquid
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.