In [59]:
from google.colab import drive
drive.mount("/content/drive")
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Imports¶
In [60]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix
import warnings
warnings.filterwarnings('ignore')
In [61]:
import os
data_path="/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files"
season_csv_list=os.listdir(data_path)
#file path for each csv
season_files=[os.path.join(data_path,s) for s in season_csv_list]
print(season_files)
['/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2024.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2023.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2022.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2021.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2020.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2019.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2018.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2017.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2016.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2015.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2014.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2013.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2012.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2011.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2010.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2009.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2008.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2007.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2006.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2005.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2004.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2003.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2002.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2001.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2000.csv', '/content/drive/MyDrive/data/premier_league_2000_t_2025/csv_files/2025.csv']
Functions calculating Teams league position based on wins and goals scored¶
In [62]:
def prepare_league_table_stats(df:pd.DataFrame)->pd.DataFrame:
  table:Dict[str,Dict[str,int]]=defaultdict(lambda:{
    "team":0,
    "points":0,
    "goals_for":0,
    "goals_against":0,
    "wins":0,
    "losses":0,
    "draws":0
  })
  for _,row in df.iterrows():
    home,away=row["HomeTeam"],row["AwayTeam"]
    hg,ag=row["FTHG"],row["FTAG"]
    table[home]["team"]=home
    table[away]["team"]=away
    table[home]["goals_for"]+=hg
    table[home]["goals_against"]+=ag
    table[away]["goals_for"]+=ag
    table[away]["goals_against"]+=hg
    if hg>ag:
       table[home]["points"]+=3
       table[home]["wins"]+=1
       table[away]["losses"]+=1
    elif ag>hg:
       table[away]["points"]+=3
       table[away]["wins"]+=1
       table[home]["losses"]+=1
    else:
      table[home]["points"]+=1
      table[away]["points"]+=1
      table[home]["draws"]+=1
      table[away]["draws"]+=1
  table_df=pd.DataFrame(table).T
  table_df["goal_diff"]=table_df["goals_for"]-table_df["goals_against"]
  table_df.sort_values(["points","wins","goals_for"],ascending=[False,False,False],inplace=True)
  table_df=table_df.reset_index(drop=True)
  table_df["position"]=table_df.index+1
  return table_df
In [63]:
def add_league_stats_to_matches(df:pd.DataFrame)->pd.DataFrame:
  """
  Parameter:Season matches stats
  Function:Matches data frame will be used to compute league table stats for each teams.The stats will be merged to corresponding team matches in h2h df
  Return:Matches stats with teams league positions,points,goals difference for both home and away team
  """
  #preparing league positions stats
  table_df=prepare_league_table_stats(df)
  #merging league stats on h2h df
  df = df.drop(columns=[col for col in df.columns if col.startswith(('h_', 'a_'))], errors='ignore')
  merged_df = df.merge(
      table_df.add_prefix("h_"),
      how='left',
      left_on='HomeTeam',
      right_on='h_team'
  ).merge(
      table_df.add_prefix("a_"),
      how='left',
      left_on='AwayTeam',
      right_on='a_team'
  )
  return merged_df
Utility functions¶
In [64]:
def parse_date_df(df: pd.DataFrame) -> pd.DataFrame:
    from pandas.api.types import is_datetime64_any_dtype
    if not is_datetime64_any_dtype(df['Date']):
        try:
            df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
        except Exception:
            # Fallback format
            df['Date'] = pd.to_datetime(df['Date'], format="%d/%m/%y", errors='coerce')
    # Optional: drop rows with invalid dates
    df = df.dropna(subset=['Date'])
    return df
In [65]:
def get_season(df: pd.DataFrame) -> pd.DataFrame:
    """
    Assign Premier League season based on date.
    Season starts in August and ends next year.
    """
    years = df['Date'].dt.year
    df['season'] = (years - (df['Date'].dt.month < 8)).astype(str) + '-' + (years - (df['Date'].dt.month < 8) + 1).astype(str)
    return df
In [66]:
def get_last5_stats(df:pd.DataFrame,team:str):
   team_matches5=df[(df["HomeTeam"]==team)|(df["AwayTeam"]==team)].sort_values(['Date'],ascending=False).head()
   avg_goals_for_last5=(  (team_matches5['HomeTeam']==team) * team_matches5['FTHG']   +
               (team_matches5['AwayTeam']==team) * team_matches5['FTAG'] ).mean()
   avg_goals_against_last5=((team_matches5['HomeTeam']==team) * team_matches5['FTAG']+
                   (team_matches5['AwayTeam']==team) * team_matches5['FTHG']).mean()
   avg_corners_last5=((team_matches5['HomeTeam']==team)* team_matches5['HC']+
                (team_matches5['AwayTeam']==team)* team_matches5['AC']).mean()
   avg_redcards_last5=((team_matches5['HomeTeam']==team) * team_matches5['HR']+
                 (team_matches5['AwayTeam']==team) * team_matches5['AR']).mean()
   # win rate in last 5 matches
   wins=((team_matches5['HomeTeam']==team) & (team_matches5['FTR']=='H') |
         (team_matches5['AwayTeam']==team)& (team_matches5['FTR']=='A')).sum()
   win_rate=wins/len(team_matches5)
   draws=((team_matches5["HomeTeam"]==team) & (team_matches5['FTR']=='D') |
          (team_matches5["AwayTeam"]==team) & (team_matches5['FTR']=='D')).sum()
   draw_rate=draws/len(team_matches5)
   return{
       "avg_goals_for_last5":avg_goals_for_last5,
       "avg_goals_against_last5":avg_goals_against_last5,
       "avg_corners_last5":avg_corners_last5,
       "avg_redcards_last5":avg_redcards_last5,
       "win_rate_last5":win_rate,
       "draw_rate_last5":draw_rate
   }
In [67]:
def add_last5_features(fixtures:pd.DataFrame)->pd.DataFrame:
    features=[]
    for _,row in fixtures.iterrows():
       home_stats_last5=get_last5_stats(fixtures,row['HomeTeam'])
       away_stats_last5=get_last5_stats(fixtures,row['AwayTeam'])
       features.append({
           **row,
           **{f"h_{k}": v for k,v in home_stats_last5.items()},
           **{f"a_{k}":v for k, v in away_stats_last5.items()},
       })
    return pd.DataFrame(features)
In [68]:
def calculate_h2h_win_rates(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate head-to-head win rates for each match."""
    h2h: Dict = {}
    home_rates, away_rates,draw_rates = [], [], []
    for _, row in df.iterrows():
        home, away = row["HomeTeam"], row["AwayTeam"]
        key = tuple(sorted([home, away]))
        stats = h2h.get(key, {"team1": 0, "team2": 0,"draws":0, "matches": 0})
        total = stats["matches"]
        # Compute current H2H rates
        if total == 0:
            home_rate = away_rate = 0.5
            draw_rate=0.33
        else:
            if home == key[0]:
                home_rate = stats["team1"] / total
                away_rate = stats["team2"] / total
            else:
                home_rate = stats["team2"] / total
                away_rate = stats["team1"] / total
            draw_rate=stats["draws"]/total
        home_rates.append(home_rate)
        away_rates.append(away_rate)
        draw_rates.append(draw_rate)
        # Update stats with current match result
        stats["matches"]+=1
        if row["FTR"]=='H':
           stats["team1" if home==key[0] else "team2"]+=1
        elif row["FTR"]=='A':
           stats["team2" if home==key[0] else "team1"]+=1
        else:
          stats["draws"]+=1
        h2h[key] = stats
    # Assign to dataframe
    df["home_vs_away_winrate"] = home_rates
    df["away_vs_home_winrate"] = away_rates
    df["h2h_draw_rate"]=draw_rates
    return df
Preparing Training dataset¶
In [69]:
#preparing dataset
season_fixtures:Dict[str,pd.DataFrame]={}
for file_path in season_files:
    raw = pd.read_csv(file_path, encoding='ISO-8859-1', on_bad_lines='skip')
    # parsing date and sorting them in desending order
    raw=parse_date_df(raw)
    raw=raw.sort_values(["Date"],ascending=False).reset_index(drop=True)
    raw=get_season(raw)
    for season_name, group in raw.groupby('season'):
        season_fixtures[season_name] = group
        group=add_league_stats_to_matches(group)
        season_fixtures[season_name] = add_last5_features(group)
In [70]:
season_df = pd.concat(season_fixtures.values(), ignore_index=True)
# Shot efficiency
h_shot_eff = season_df["HST"].div(season_df["HS"]).fillna(0)
a_shot_eff = season_df["AST"].div(season_df["AS"]).fillna(0)
season_df["home_shot_efficiency"] = h_shot_eff
season_df["away_shot_efficiency"] = a_shot_eff
# Gap features
season_df["goal_diff_gap"] = season_df["h_goal_diff"] - season_df["a_goal_diff"]
season_df["points_gap"] = season_df["h_points"] - season_df["a_points"]
season_df["form_gap"] = season_df["HomeTeam_Form"] - season_df["AwayTeam_Form"]
season_df["win_rate_gap"] = season_df["h_win_rate_last5"] - season_df["a_win_rate_last5"]
season_df["attack_strength_gap"] = season_df["h_avg_goals_for_last5"] - season_df["a_avg_goals_for_last5"]
season_df["defense_strength_gap"] = season_df["a_avg_goals_against_last5"] - season_df["h_avg_goals_against_last5"]
season_df["league_position_gap"] = season_df["h_position"] - season_df["a_position"]
# Head-to-head win rates
season_df = calculate_h2h_win_rates(season_df)
# =====feature engineering from odd scores of B365 agency====
season_df["home_away_win_ratio"]=season_df["B365H"]/season_df["B365A"]
season_df["draw_odd_ratio"]=season_df["B365D"]/season_df[["B365H","B365A"]].min(axis=1)
# calculating implied probablity of bet odd
season_df["h_implied_prob"]=1/season_df["B365H"]
season_df["a_implied_prob"]=1/season_df["B365D"]
season_df["d_implied_prob"]=1/season_df["B365A"]
season_df["betting_confidence"]=season_df["h_implied_prob"]-season_df["a_implied_prob"]
"""
favorite strength-> Strong team probabilty to win
underdog strength-> Weak team probabilty to win
"""
season_df["favorite_strength"]=season_df[["h_implied_prob","a_implied_prob"]].min(axis=1)
season_df["underdog_strength"]=season_df[["h_implied_prob","a_implied_prob"]].max(axis=1)
#calculating odd variance from different book markers
home_odds_cols = [
    "B365H", "BWH", "BFH", "PSH", "WHH", "1XBH", "MaxH", "AvgH", "BFEH"
]
away_odds_cols = [
    "B365A", "BWA", "BFA", "PSA", "WHA", "1XBA", "MaxA", "AvgA", "BFEA"
]
#==========last 5 matches ===================
season_df['home_draw_rate_last5'] = season_df.groupby('HomeTeam')['FTR'].apply(
    lambda x: (x.shift()=='D').rolling(5, min_periods=1).mean()
).reset_index(level=0, drop=True)
season_df['away_draw_rate_last5'] = season_df.groupby('AwayTeam')['FTR'].apply(
    lambda x: (x.shift()=='D').rolling(5, min_periods=1).mean()
).reset_index(level=0, drop=True)
season_df['draw_rate_gap'] = season_df['home_draw_rate_last5'] - season_df['away_draw_rate_last5']
season_df['home_low_score_rate'] = season_df.groupby('HomeTeam')['FTHG'].apply(lambda x: (x.shift()<=1).rolling(5,min_periods=1).mean()).reset_index(level=0, drop=True)
season_df['away_low_score_rate'] = season_df.groupby('AwayTeam')['FTAG'].apply(lambda x: (x.shift()<=1).rolling(5,min_periods=1).mean()).reset_index(level=0, drop=True)
season_df['low_score_gap'] = season_df['home_low_score_rate'] - season_df['away_low_score_rate']
season_df["home_odd_variance"]=season_df[home_odds_cols].var(axis=1)
season_df["away_odd_variance"]=season_df[away_odds_cols].var(axis=1)
#=============================================================
season_df.sort_values(["season", "Date"], ascending=False, inplace=True)
# Updated selected columns
selected_cols = [
    "HomeTeam", "AwayTeam",
    "FTHG", "FTAG",
    # Head to head win rates
    "home_vs_away_winrate",
    "away_vs_home_winrate",
    # Gap features
    "goal_diff_gap",
    "points_gap",
    "form_gap",
    "win_rate_gap",
    "attack_strength_gap",
    "defense_strength_gap",
    "league_position_gap",
    # Match performance stats
    "home_shot_efficiency",
    "away_shot_efficiency",
    # Home advantage
    "HomeTeam_Form",
    "AwayTeam_Form",
    "home_away_win_ratio",
    "draw_odd_ratio",
    "h_implied_prob",
    "a_implied_prob",
    "d_implied_prob",
    "home_odd_variance",
    "away_odd_variance",
    "betting_confidence",
    "favorite_strength",
    "underdog_strength",
"home_draw_rate_last5",
    "away_draw_rate_last5",
"draw_rate_gap",
"home_low_score_rate",
    "away_low_score_rate",
  "low_score_gap",
    "FTR"
]
store_df = season_df.copy()
store_df.replace([-np.inf,np.inf],np.nan,inplace=True)
store_df.fillna(0.5,inplace=True)
season_df = season_df[selected_cols]
season_df.reset_index(drop=True, inplace=True)
# Drop rows with missing critical data
season_df.dropna(subset=["HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR"], inplace=True)
season_df.head()
Out[70]:
| HomeTeam | AwayTeam | FTHG | FTAG | home_vs_away_winrate | away_vs_home_winrate | goal_diff_gap | points_gap | form_gap | win_rate_gap | ... | betting_confidence | favorite_strength | underdog_strength | home_draw_rate_last5 | away_draw_rate_last5 | draw_rate_gap | home_low_score_rate | away_low_score_rate | low_score_gap | FTR | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Brentford | Man City | 0.0 | 1.0 | 0.250000 | 0.625000 | -12.0 | -6 | 0.0 | -0.4 | ... | -0.011696 | 0.210526 | 0.222222 | 0.2 | 0.2 | 0.0 | 0.6 | 0.4 | 0.2 | A | 
| 1 | Wolves | Brighton | 1.0 | 1.0 | 0.142857 | 0.428571 | -9.0 | -7 | 0.0 | -0.4 | ... | 0.007508 | 0.270270 | 0.277778 | 0.2 | 0.2 | 0.0 | 0.8 | 0.8 | 0.0 | D | 
| 2 | Newcastle | Nott'm Forest | 2.0 | 0.0 | 0.833333 | 0.166667 | 8.0 | 4 | 0.0 | 0.4 | ... | 0.398847 | 0.238095 | 0.636943 | 0.2 | 0.2 | 0.0 | 0.6 | 1.0 | -0.4 | H | 
| 3 | Everton | Crystal Palace | 2.0 | 1.0 | 0.500000 | 0.115385 | -2.0 | -1 | 0.0 | -0.2 | ... | 0.100471 | 0.307692 | 0.408163 | 0.4 | 0.4 | 0.0 | 0.6 | 1.0 | -0.4 | H | 
| 4 | Aston Villa | Burnley | 2.0 | 1.0 | 0.416667 | 0.166667 | 7.0 | 5 | -0.4 | 0.4 | ... | 0.375000 | 0.250000 | 0.625000 | 0.4 | 0.0 | 0.4 | 0.4 | 0.8 | -0.4 | H | 
5 rows × 34 columns
EDA¶
In [90]:
numeric_cols=season_df.select_dtypes(include=["number"])
plt.figure(figsize=(20,11))
sns.heatmap(numeric_cols.corr(),annot=True,cmap='coolwarm')
Out[90]:
<Axes: >
Distribution of match outcome¶
In [72]:
sns.countplot(x=season_df['FTR'])
Out[72]:
<Axes: xlabel='FTR', ylabel='count'>
Dataset is biased on home wins
Feature Selection¶
In [73]:
# selecting features for Training and Test sets
features = [
    # head to head win rates
    "home_vs_away_winrate",
    "away_vs_home_winrate",
    # Match performance stats
    "home_shot_efficiency",
    "away_shot_efficiency",
    "goal_diff_gap",
    "points_gap",
    "form_gap",
    "win_rate_gap",
    "attack_strength_gap",
    "defense_strength_gap",
    "league_position_gap",
    "home_away_win_ratio",
    "draw_odd_ratio",
    "h_implied_prob",
    "a_implied_prob",
    "d_implied_prob",
    "home_odd_variance",
    "away_odd_variance",
    "betting_confidence",
    "favorite_strength",
    "underdog_strength",
  "home_draw_rate_last5",
  "away_draw_rate_last5",
  "draw_rate_gap",
  "home_low_score_rate",
  "away_low_score_rate",
  "low_score_gap"
]
target='FTR'
X=season_df[features]
X = X[features].fillna(0)
y=season_df[target]
outcome_to_int = {
    "H": 0,
    "D": 1,
    "A": 2
}
y=y.map(outcome_to_int)
scaler=StandardScaler()
X=X.replace([np.inf,-np.inf],np.nan)
X=X.fillna(0.5)
#scaling X
X_scaled=scaler.fit_transform(X)
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,
                                                    stratify=y)
Random Forest Classifier¶
In [74]:
rf=RandomForestClassifier(    n_estimators=300,          # more trees
    max_depth=12,              # limit tree depth
    min_samples_split=5,       # prevent splits with too few samples
    min_samples_leaf=2,        # leaf must have at least 2 samples
    max_features='sqrt',       # sqrt(num_features) for split
    class_weight="balanced",   # handle class imbalance
    random_state=42)
rf.fit(X_train,y_train)
y_pred=rf.predict(X_test)
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
Accuracy: 0.5922995780590717
              precision    recall  f1-score   support
           0       0.69      0.74      0.71       869
           1       0.36      0.26      0.30       467
           2       0.58      0.64      0.61       560
    accuracy                           0.59      1896
   macro avg       0.54      0.55      0.54      1896
weighted avg       0.57      0.59      0.58      1896
In [75]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
cm = confusion_matrix(y_test, y_pred)
cm
Out[75]:
array([[644, 123, 102],
       [188, 121, 158],
       [107,  95, 358]])
In [76]:
import numpy as np
classes = ["Home", "Draw", "Away"]
def predict_match_rf(df, model, home_team, away_team, features,alpha=0.7):
    """
    Predict the outcome of a single match using a trained Random Forest.
    Parameters:
        df (pd.DataFrame): Dataset with match features.
        model: Trained RandomForestClassifier.
        home_team (str): Name of the home team.
        away_team (str): Name of the away team.
        features (list): List of feature column names.
        draw_threshold (float): Minimum probability to predict Draw.
    Returns:
        predicted_class (int or None): 0=Home, 1=Draw, 2=Away
        predicted_probs (np.array or None): probability array [Home, Draw, Away]
    """
    # Select the match row
    match_row = df[(df['HomeTeam'] == home_team) & (df['AwayTeam'] == away_team)]
    if match_row.empty:
        return None, None
    X_match = match_row[features]
    # Predict probabilities
    y_pred_proba = model.predict_proba(X_match)[0]
    implied_prob_home=match_row["h_implied_prob"].values[0]
    implied_prob_draw=match_row["d_implied_prob"].values[0]
    implied_prob_away=match_row["a_implied_prob"].values[0]
    final_home_prob=y_pred_proba[0]*alpha+(1-alpha)*implied_prob_home
    final_draw_prob=y_pred_proba[1]*alpha+(1-alpha)*implied_prob_draw
    final_away_prob=y_pred_proba[2]*alpha+(1-alpha)*implied_prob_away
    total=final_home_prob+final_draw_prob+final_away_prob
    final_y_proba=np.array([final_home_prob,final_draw_prob,final_away_prob])/total
    return  final_y_proba
# ==================================
# Example usage: loop through fixtures
# ==================================
fixtures = [
    ("Nott'm Forest", "Chelsea"),
    ("Brighton", "Newcastle"),
    ("Burnley", "Leeds"),
    ("Crystal Palace", "Bournemouth"),
    ("Man City", "Everton"),
    ("Sunderland", "Wolves"),
    ("Fulham", "Arsenal"),
    ("Tottenham", "Aston Villa"),
    ("Liverpool", "Man United"),
    ("West Ham", "Brentford"),
]
for home_team, away_team in fixtures:
     pred_probs = predict_match_rf(store_df, rf, home_team, away_team, features)
     pred_class=pred_probs.argmax()
     if pred_class is not None:
        print(f"{home_team} vs {away_team}")
        print("   Home ", f"{pred_probs[0]*100:.2f}%")
        print("   Draw ", f"{pred_probs[1]*100:.2f}%")
        print("   Away ", f"{pred_probs[2]*100:.2f}%")
        print("-" * 40)
Nott'm Forest vs Chelsea Home 20.28% Draw 34.20% Away 45.52% ---------------------------------------- Brighton vs Newcastle Home 24.35% Draw 53.20% Away 22.45% ---------------------------------------- Burnley vs Leeds Home 27.63% Draw 54.39% Away 17.98% ---------------------------------------- Crystal Palace vs Bournemouth Home 23.42% Draw 53.72% Away 22.86% ---------------------------------------- Man City vs Everton Home 45.25% Draw 40.90% Away 13.85% ---------------------------------------- Sunderland vs Wolves Home 42.66% Draw 42.12% Away 15.22% ---------------------------------------- Fulham vs Arsenal Home 16.70% Draw 62.32% Away 20.98% ---------------------------------------- Tottenham vs Aston Villa Home 36.39% Draw 31.74% Away 31.87% ---------------------------------------- Liverpool vs Man United Home 55.57% Draw 35.36% Away 9.07% ---------------------------------------- West Ham vs Brentford Home 21.15% Draw 30.79% Away 48.06% ----------------------------------------
Neural network¶
In [77]:
from tensorflow import keras
from tensorflow.keras import layers
model=keras.Sequential([
    layers.Dense(128,activation='relu',input_shape=(X_train.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(64,activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(32,activation='relu'),
    layers.Dense(3,activation='softmax')
])
model.compile(optimizer='adam',metrics=['accuracy'],loss='sparse_categorical_crossentropy')
In [78]:
from sklearn.utils.class_weight import compute_class_weight
class_weights=compute_class_weight('balanced',classes=np.unique(y_train),y=y_train)
class_weights=dict(enumerate(class_weights))
history=model.fit(X_train,y_train,
                  batch_size=32,epochs=60,
                  validation_split=0.2,class_weight=class_weights)
Epoch 1/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 2s 4ms/step - accuracy: 0.4141 - loss: 1.6111 - val_accuracy: 0.5201 - val_loss: 0.9689 Epoch 2/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4887 - loss: 1.0220 - val_accuracy: 0.5188 - val_loss: 0.9921 Epoch 3/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.4925 - loss: 1.0147 - val_accuracy: 0.5122 - val_loss: 1.0036 Epoch 4/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.5131 - loss: 1.0021 - val_accuracy: 0.5069 - val_loss: 0.9963 Epoch 5/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5309 - loss: 0.9876 - val_accuracy: 0.4819 - val_loss: 1.0162 Epoch 6/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5128 - loss: 0.9928 - val_accuracy: 0.5049 - val_loss: 0.9804 Epoch 7/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5139 - loss: 0.9967 - val_accuracy: 0.5016 - val_loss: 0.9896 Epoch 8/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.5216 - loss: 0.9859 - val_accuracy: 0.4957 - val_loss: 1.0072 Epoch 9/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5194 - loss: 0.9877 - val_accuracy: 0.5129 - val_loss: 0.9728 Epoch 10/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5100 - loss: 0.9995 - val_accuracy: 0.5115 - val_loss: 0.9666 Epoch 11/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5164 - loss: 0.9972 - val_accuracy: 0.5201 - val_loss: 0.9587 Epoch 12/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5232 - loss: 0.9890 - val_accuracy: 0.5082 - val_loss: 0.9907 Epoch 13/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5152 - loss: 0.9866 - val_accuracy: 0.5221 - val_loss: 0.9571 Epoch 14/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5302 - loss: 0.9760 - val_accuracy: 0.5221 - val_loss: 0.9719 Epoch 15/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5322 - loss: 0.9743 - val_accuracy: 0.5115 - val_loss: 0.9797 Epoch 16/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5221 - loss: 0.9716 - val_accuracy: 0.5096 - val_loss: 0.9716 Epoch 17/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5240 - loss: 0.9743 - val_accuracy: 0.5115 - val_loss: 0.9778 Epoch 18/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5289 - loss: 0.9737 - val_accuracy: 0.5142 - val_loss: 0.9725 Epoch 19/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5405 - loss: 0.9687 - val_accuracy: 0.5274 - val_loss: 0.9704 Epoch 20/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5321 - loss: 0.9807 - val_accuracy: 0.5214 - val_loss: 0.9836 Epoch 21/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5345 - loss: 0.9760 - val_accuracy: 0.5194 - val_loss: 0.9728 Epoch 22/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5281 - loss: 0.9780 - val_accuracy: 0.5254 - val_loss: 0.9826 Epoch 23/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5439 - loss: 0.9677 - val_accuracy: 0.5379 - val_loss: 0.9744 Epoch 24/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5383 - loss: 0.9714 - val_accuracy: 0.5320 - val_loss: 0.9686 Epoch 25/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5241 - loss: 0.9718 - val_accuracy: 0.5339 - val_loss: 0.9635 Epoch 26/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5329 - loss: 0.9708 - val_accuracy: 0.5412 - val_loss: 0.9515 Epoch 27/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - accuracy: 0.5500 - loss: 0.9530 - val_accuracy: 0.5478 - val_loss: 0.9592 Epoch 28/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5733 - loss: 0.9509 - val_accuracy: 0.5419 - val_loss: 0.9571 Epoch 29/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5494 - loss: 0.9562 - val_accuracy: 0.5485 - val_loss: 0.9588 Epoch 30/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5548 - loss: 0.9534 - val_accuracy: 0.5630 - val_loss: 0.9591 Epoch 31/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5587 - loss: 0.9460 - val_accuracy: 0.5623 - val_loss: 0.9357 Epoch 32/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5723 - loss: 0.9398 - val_accuracy: 0.5590 - val_loss: 0.9323 Epoch 33/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5788 - loss: 0.9412 - val_accuracy: 0.5583 - val_loss: 0.9435 Epoch 34/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5719 - loss: 0.9378 - val_accuracy: 0.5577 - val_loss: 0.9499 Epoch 35/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - accuracy: 0.5663 - loss: 0.9445 - val_accuracy: 0.5498 - val_loss: 0.9454 Epoch 36/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5738 - loss: 0.9407 - val_accuracy: 0.5471 - val_loss: 0.9415 Epoch 37/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5575 - loss: 0.9400 - val_accuracy: 0.5386 - val_loss: 0.9666 Epoch 38/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5685 - loss: 0.9449 - val_accuracy: 0.5557 - val_loss: 0.9381 Epoch 39/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5646 - loss: 0.9546 - val_accuracy: 0.5550 - val_loss: 0.9387 Epoch 40/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5690 - loss: 0.9244 - val_accuracy: 0.5557 - val_loss: 0.9431 Epoch 41/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - accuracy: 0.5730 - loss: 0.9285 - val_accuracy: 0.5610 - val_loss: 0.9316 Epoch 42/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5670 - loss: 0.9315 - val_accuracy: 0.5544 - val_loss: 0.9326 Epoch 43/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - accuracy: 0.5634 - loss: 0.9394 - val_accuracy: 0.5452 - val_loss: 0.9539 Epoch 44/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - accuracy: 0.5650 - loss: 0.9390 - val_accuracy: 0.5557 - val_loss: 0.9283 Epoch 45/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5673 - loss: 0.9315 - val_accuracy: 0.5511 - val_loss: 0.9405 Epoch 46/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5799 - loss: 0.9230 - val_accuracy: 0.5458 - val_loss: 0.9278 Epoch 47/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5753 - loss: 0.9284 - val_accuracy: 0.5557 - val_loss: 0.9325 Epoch 48/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5750 - loss: 0.9282 - val_accuracy: 0.5531 - val_loss: 0.9327 Epoch 49/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5833 - loss: 0.9241 - val_accuracy: 0.5419 - val_loss: 0.9591 Epoch 50/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5711 - loss: 0.9239 - val_accuracy: 0.5498 - val_loss: 0.9307 Epoch 51/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5753 - loss: 0.9244 - val_accuracy: 0.5353 - val_loss: 0.9555 Epoch 52/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5800 - loss: 0.9236 - val_accuracy: 0.5583 - val_loss: 0.9346 Epoch 53/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5714 - loss: 0.9215 - val_accuracy: 0.5577 - val_loss: 0.9521 Epoch 54/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5802 - loss: 0.9168 - val_accuracy: 0.5564 - val_loss: 0.9364 Epoch 55/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5874 - loss: 0.9123 - val_accuracy: 0.5478 - val_loss: 0.9511 Epoch 56/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5665 - loss: 0.9248 - val_accuracy: 0.5491 - val_loss: 0.9346 Epoch 57/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.5839 - loss: 0.9101 - val_accuracy: 0.5465 - val_loss: 0.9399 Epoch 58/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 2s 10ms/step - accuracy: 0.5774 - loss: 0.9132 - val_accuracy: 0.5412 - val_loss: 0.9546 Epoch 59/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 3s 15ms/step - accuracy: 0.5631 - loss: 0.9299 - val_accuracy: 0.5531 - val_loss: 0.9284 Epoch 60/60 190/190 ━━━━━━━━━━━━━━━━━━━━ 2s 12ms/step - accuracy: 0.5844 - loss: 0.9103 - val_accuracy: 0.5485 - val_loss: 0.9310
In [79]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.3f}")
from sklearn.metrics import classification_report
y_pred=model.predict(X_test).argmax(axis=1)
print(classification_report(y_test,y_pred))
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.5548 - loss: 0.9276 Test Accuracy: 0.563 60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step precision recall f1-score support 0 0.71 0.67 0.69 869 1 0.31 0.33 0.32 467 2 0.58 0.60 0.59 560 accuracy 0.56 1896 macro avg 0.53 0.53 0.53 1896 weighted avg 0.57 0.56 0.57 1896
In [80]:
import numpy as np
classes = ["Home", "Draw", "Away"]
def predict_upcoming_match_nn(df, model, home_team, away_team, features, alpha=0.7):
    """
    Predict match outcome by blending neural network predictions with implied odds
    Args:
        df: DataFrame containing match data
        model: Trained neural network model
        home_team: Home team name
        away_team: Away team name
        features: List of feature column names
        alpha: Weight for model predictions (1-alpha for betting odds)
    Returns:
        Tuple of (predicted_class, probability_array)
    """
    match = df[(df["HomeTeam"] == home_team) & (df["AwayTeam"] == away_team)]
    if match.empty:
        print(f"No match found between {home_team} and {away_team}")
        return None, None
    X_match = match[features].values
    # NN predicts probabilities directly
    y_pred_proba = model.predict(X_match)[0]  # shape (3,)
    # Get implied probabilities from betting odds
    implied_home = match['h_implied_prob'].values[0]
    implied_draw = match['a_implied_prob'].values[0]
    implied_away = match['d_implied_prob'].values[0]
    # Blend model probs with odds
    final_home = alpha * y_pred_proba[0] + (1 - alpha) * implied_home
    final_draw = alpha * y_pred_proba[1] + (1 - alpha) * implied_draw
    final_away = alpha * y_pred_proba[2] + (1 - alpha) * implied_away
    # Normalize to sum = 1
    total = final_home + final_draw + final_away
    final_probs = np.array([final_home, final_draw, final_away]) / total
    # Final prediction (0=Home, 1=Draw, 2=Away)
    y_pred = int(np.argmax(final_probs))
    return y_pred, final_probs
# List of upcoming fixtures
fixtures = [
    ("Nott'm Forest", "Chelsea"),
    ("Brighton", "Newcastle"),
    ("Burnley", "Leeds"),
    ("Crystal Palace", "Bournemouth"),
    ("Man City", "Everton"),
    ("Sunderland", "Wolves"),
    ("Fulham", "Arsenal"),
    ("Tottenham", "Aston Villa"),
    ("Liverpool", "Man United"),
    ("West Ham", "Brentford"),
]
# Make predictions for all fixtures
for home_team, away_team in fixtures:
    pred_class, pred_probs = predict_upcoming_match_nn(
        store_df, model, home_team, away_team, features
    )
    if pred_class is None:
        continue
    print(f"{home_team} vs {away_team}")
    print(f"→ Predicted: {pred_class}")
    print(f"   Home: {pred_probs[0]*100:.2f}%")
    print(f"   Draw: {pred_probs[1]*100:.2f}%")
    print(f"   Away: {pred_probs[2]*100:.2f}%")
    print("-" * 40)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step Nott'm Forest vs Chelsea → Predicted: 2 Home: 24.47% Draw: 36.89% Away: 38.64% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step Brighton vs Newcastle → Predicted: 2 Home: 23.35% Draw: 37.42% Away: 39.23% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step Burnley vs Leeds → Predicted: 1 Home: 34.09% Draw: 38.04% Away: 27.87% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step Crystal Palace vs Bournemouth → Predicted: 2 Home: 21.89% Draw: 37.25% Away: 40.86% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step Man City vs Everton → Predicted: 0 Home: 54.30% Draw: 28.49% Away: 17.21% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 40ms/step Sunderland vs Wolves → Predicted: 0 Home: 46.96% Draw: 33.40% Away: 19.64% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step Fulham vs Arsenal → Predicted: 0 Home: 35.79% Draw: 34.12% Away: 30.09% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step Tottenham vs Aston Villa → Predicted: 0 Home: 36.91% Draw: 27.62% Away: 35.47% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step Liverpool vs Man United → Predicted: 0 Home: 67.55% Draw: 23.07% Away: 9.38% ---------------------------------------- 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step West Ham vs Brentford → Predicted: 1 Home: 27.66% Draw: 37.67% Away: 34.67% ----------------------------------------
Neural Network accuracy progression and loss minimization over epochs¶
In [81]:
cm=confusion_matrix(y_test,y_pred)
sns.heatmap(cm,cmap='coolwarm',annot=True,fmt="d")
plt.xlabel('actual')
plt.ylabel('predicited')
plt.show()
In [82]:
plt.plot(history.history['accuracy'],label='Train acc')
plt.plot(history.history['val_accuracy'],label='Val acc')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
In [83]:
plt.plot(history.history['loss'],label='Train loss')
plt.plot(history.history['val_loss'],label='Val loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()
Logistic Regression¶
In [84]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(class_weight='balanced')
lr.fit(X_train,y_train)
Out[84]:
LogisticRegression(class_weight='balanced')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(class_weight='balanced')
In [85]:
y_pred=lr.predict(X_test)
print(classification_report(y_test,y_pred))
              precision    recall  f1-score   support
           0       0.71      0.65      0.68       869
           1       0.32      0.37      0.34       467
           2       0.58      0.59      0.58       560
    accuracy                           0.56      1896
   macro avg       0.54      0.53      0.53      1896
weighted avg       0.58      0.56      0.57      1896
In [86]:
import numpy as np
classes = ["Home", "Draw", "Away"]
def predict_match_lr(df, model, home_team, away_team, features,alpha=0.7):
    """
    Parameters:
        df (pd.DataFrame): Dataset with match features.
        model: Trained RandomForestClassifier.
        home_team (str): Name of the home team.
        away_team (str): Name of the away team.
        features (list): List of feature column names.
        draw_threshold (float): Minimum probability to predict Draw.
    Returns:
        predicted_class (int or None): 0=Home, 1=Draw, 2=Away
        predicted_probs (np.array or None): probability array [Home, Draw, Away]
    """
    # Select the match row
    match_row = df[(df['HomeTeam'] == home_team) & (df['AwayTeam'] == away_team)]
    if match_row.empty:
        return None, None
    X_match = match_row[features]
    # Predict probabilities
    y_pred_proba = model.predict_proba(X_match)[0]
    implied_prob_home=match_row["h_implied_prob"].values[0]
    implied_prob_draw=match_row["d_implied_prob"].values[0]
    implied_prob_away=match_row["a_implied_prob"].values[0]
    final_home_prob=y_pred_proba[0]*alpha+(1-alpha)*implied_prob_home
    final_draw_prob=y_pred_proba[1]*alpha+(1-alpha)*implied_prob_draw
    final_away_prob=y_pred_proba[2]*alpha+(1-alpha)*implied_prob_away
    total=final_home_prob+final_draw_prob+final_away_prob
    final_y_proba=np.array([final_home_prob,final_draw_prob,final_away_prob])/total
    return  final_y_proba
# ==================================
# Example usage: loop through fixtures
# ==================================
fixtures = [
    ("Nott'm Forest", "Chelsea"),
    ("Brighton", "Newcastle"),
    ("Burnley", "Leeds"),
    ("Crystal Palace", "Bournemouth"),
    ("Man City", "Everton"),
    ("Sunderland", "Wolves"),
    ("Fulham", "Arsenal"),
    ("Tottenham", "Aston Villa"),
    ("Liverpool", "Man United"),
    ("West Ham", "Brentford"),
]
for home_team, away_team in fixtures:
    pred_probs = predict_match_lr(store_df, lr, home_team, away_team, features)
    print(f"{home_team} vs {away_team}")
    print("   Home ", f"{pred_probs[0]*100:.2f}%")
    print("   Draw ", f"{pred_probs[1]*100:.2f}%")
    print("   Away ", f"{pred_probs[2]*100:.2f}%")
    predicted_class=pred_probs.argmax()
    print(predicted_class)
    print("-" * 40)
Nott'm Forest vs Chelsea Home 26.73% Draw 41.95% Away 31.31% 1 ---------------------------------------- Brighton vs Newcastle Home 29.06% Draw 41.58% Away 29.36% 1 ---------------------------------------- Burnley vs Leeds Home 35.17% Draw 47.79% Away 17.04% 1 ---------------------------------------- Crystal Palace vs Bournemouth Home 23.25% Draw 46.99% Away 29.76% 1 ---------------------------------------- Man City vs Everton Home 47.19% Draw 25.38% Away 27.43% 0 ---------------------------------------- Sunderland vs Wolves Home 51.99% Draw 27.06% Away 20.95% 0 ---------------------------------------- Fulham vs Arsenal Home 47.98% Draw 38.00% Away 14.02% 0 ---------------------------------------- Tottenham vs Aston Villa Home 29.74% Draw 42.26% Away 28.01% 1 ---------------------------------------- Liverpool vs Man United Home 70.44% Draw 20.54% Away 9.02% 0 ---------------------------------------- West Ham vs Brentford Home 24.65% Draw 41.67% Away 33.68% 1 ----------------------------------------
In [87]:
cm=confusion_matrix(y_test,y_pred)
sns.heatmap(cm,cmap='coolwarm',annot=True,fmt='d')
Out[87]:
<Axes: >