Issue
I'm trying to run the following Python script on Spyder Python 3.7. I am very new to python and this is my first time attempting to run a program this complicated:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import itertools
matches = pd.read_csv( '/Input/international-football-results-from-1872-to-2017/results 3.csv')
matches.head(2)
matches = matches.astype({'date':'datetime64[ns]'})
print = ( "Then I create a dataframe with some statistics by team like the sum, count and mean of score for each team.")
# create two dataframe for the home and away teams
home = matches[['home_team', 'home_score']].rename(columns={'home_team':'team', 'home_score':'score'})
away = matches[['away_team', 'away_score']].rename(columns={'away_team':'team', 'away_score':'score'})
# merge it into one and removes any corrilation between away and home teams
team_score = home.append(away).reset_index(drop=True)
team_score.head(5)
# make an aggregation of the the score column group by the team
country_info = team_score.groupby('team')['score'].agg(['sum','count','mean']).reset_index()
country_info = country_info.rename(columns={'sum':'numb_goals', 'count':'numb_matches', 'mean':'goal_avg'})
del home, away
means = matches[['home_score','away_score']].mean()
means
def weight_from_tournament(tournament):
if 'Cup' in tournament or 'Euro' in tournament or 'Copa del Rey' in tournament or 'UEFA' in tournament or 'Copa Libertadores' in tournament or 'Copa America' in tournament:
return 1;
else :
return 100;
# Weight column based on type of tournament
matches.loc[:,'weight'] = matches['tournament'].apply(weight_from_tournament)
# Mpdify weight column based on the date
matches.loc[:,'weight'] = 1 / ((2019 - matches['date'].dt.year.astype('int64'))*matches['weight'])
# Create model data
matches_model_data = pd.concat([matches[['home_team','away_team','home_score','weight']].rename(
columns={'home_team':'team', 'away_team':'opponent','home_score':'goals'}),
matches[['away_team','home_team','away_score','weight']].rename(
columns={'away_team':'team', 'home_team':'opponent','away_score':'goals'})])
#So here using the statsmodels librairies I create my model with a Poisson regression.
poisson_model = smf.glm(formula="goals ~ team + opponent", data=matches_model_data,
family=sm.families.Poisson(), freq_weights=matches_model_data['weight'].values).fit()
print = ("Now it's time to make a function that can return the result of a game, there is some information that we need to know : ")
def get_proba_match(foot_model, team1, team2, max_goals=10):
# Get the average goal for each team
t1_goals_avg = foot_model.predict(pd.DataFrame.apply(data={'team': team1, 'opponent': team2}, index=[1])).values[0]
t2_goals_avg = foot_model.predict(pd.DataFrame.apply(data={'team': team2, 'opponent': team1}, index=[1])).values[0]
# Get probability of all possible score for each team
team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [t1_goals_avg, t2_goals_avg]]
# Do the product of the 2 vectors to get the matrix of the match
match = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
# Get the proba for each possible outcome
t1_wins = np.sum(np.tril(match, -1))
draw = np.sum(np.diag(match))
t2_wins = np.sum(np.triu(match, 1))
result_proba = [t1_wins, draw, t2_wins]
# Adjust the proba to sum to one
result_proba = np.array(result_proba)/ np.array(result_proba).sum(axis=0,keepdims=1)
team_pred[0] = np.array(team_pred[0])/np.array(team_pred[0]).sum(axis=0,keepdims=1)
team_pred[1] = np.array(team_pred[1])/np.array(team_pred[1]).sum(axis=0,keepdims=1)
return result_proba, [np.array(team_pred[0]), np.array(team_pred[1])]
def get_match_result(foot_model, team1, team2, elimination=False, max_draw=50, max_goals=10):
# Get the proba
proba, score_proba = get_proba_match(foot_model, team1, team2, max_goals)
# Get the result, if it's an elimination game we have to be sure the result is not draw
results = pd.Series([np.random.choice([team1, 'draw', team2], p=proba) for i in range(0,max_draw)]).value_counts()
result = results.index[0] if not elimination or (elimination and results.index[0] != 'draw') else results.index[1]
# If the result is not a draw game then we calculate the score of the winner from 1 to the max_goals
# and the score of the looser from 0 to the score of the winner
if (result != 'draw'):
i_win, i_loose = (0,1) if result == team1 else (1,0)
score_proba[i_win] = score_proba[i_win][1:]/score_proba[i_win][1:].sum(axis=0,keepdims=1)
winner_score = pd.Series([np.random.choice(range(1, max_goals+1), p=score_proba[i_win]) for i in range(0,max_draw)]).value_counts().index[0]
score_proba[i_loose] = score_proba[i_loose][:winner_score]/score_proba[i_loose][:winner_score].sum(axis=0,keepdims=1)
looser_score = pd.Series([np.random.choice(range(0, winner_score), p=score_proba[i_loose]) for i in range(0,max_draw)]).value_counts().index[0]
score = [winner_score, looser_score]
# If it's a draw then we calculate a score and repeat it twice
else:
score = np.repeat(pd.Series([np.random.choice(range(0, max_goals+1), p=score_proba[0]) for i in range(0,max_draw)]).value_counts().index[0],2)
looser = team2 if result == team1 else team1 if result != 'draw' else 'draw'
return result, looser, score
print = ("Now let's try it !")
print(get_match_result(poisson_model, 'Gabon', 'Togo'))
print(get_match_result(poisson_model, 'France', 'Togo', elimination=True))
print(get_match_result(poisson_model, 'Argentina', 'Germany'))
print(get_match_result(poisson_model, 'Brazil', 'Vatican', max_goals=20))
print(get_match_result(poisson_model, 'England', 'Morocco'))
print(get_match_result(poisson_model, 'Iran', 'Japan'))
However, I am getting the following error:
print(get_match_result(poisson_model, 'Gabon', 'Togo'))
Traceback (most recent call last):
File "<ipython-input-72-3287e60bece4>", line 1, in <module>
print(get_match_result(poisson_model, 'Gabon', 'Togo'))
TypeError: 'DataFrame' object is not callable
I have modifyed the script from: https://www.kaggle.com/nathanlauga/which-team-will-win-the-2018-fifa-world-cup
It runs fine if Fork the Notebook and use the Kaggle online console.
Any and all suggestions would be greatly appreciated.
Solution
Your problem here is only on the syntax.
In Python you don't want to express print = ('something')
because that means that you are assigning a value to a variable called print. Instead you have to do print('something')
I hope it helped
Answered By - Guilherme Uzeda
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.