|
"""SPADL schema for Opta data.""" |
|
|
|
from typing import Optional |
|
|
|
import pandas as pd |
|
import pandera as pa |
|
from pandera.typing import DateTime, Object, Series |
|
|
|
from socceraction.data.schema import ( |
|
CompetitionSchema, |
|
EventSchema, |
|
GameSchema, |
|
PlayerSchema, |
|
TeamSchema, |
|
) |
|
|
|
|
|
class OptaCompetitionSchema(CompetitionSchema): |
|
"""Definition of a dataframe containing a list of competitions and seasons.""" |
|
|
|
|
|
class OptaGameSchema(GameSchema): |
|
"""Definition of a dataframe containing a list of games.""" |
|
|
|
home_score: Optional[Series[int]] = pa.Field(nullable=True) |
|
"""The final score of the home team.""" |
|
away_score: Optional[Series[int]] = pa.Field(nullable=True) |
|
"""The final score of the away team.""" |
|
duration: Optional[Series[int]] = pa.Field(nullable=True) |
|
"""The total duration of the game in minutes.""" |
|
referee: Optional[Series[str]] = pa.Field(nullable=True) |
|
"""The name of the referee.""" |
|
venue: Optional[Series[str]] = pa.Field(nullable=True) |
|
"""The name of the stadium where the game was played.""" |
|
attendance: Optional[Series[int]] = pa.Field(nullable=True) |
|
"""The number of people who attended the game.""" |
|
home_manager: Optional[Series[str]] = pa.Field(nullable=True) |
|
"""The name of the manager of the home team.""" |
|
away_manager: Optional[Series[str]] = pa.Field(nullable=True) |
|
"""The name of the manager of the away team.""" |
|
|
|
|
|
class OptaPlayerSchema(PlayerSchema): |
|
"""Definition of a dataframe containing the list of players of a game.""" |
|
|
|
starting_position: Series[str] |
|
"""The starting position of the player.""" |
|
|
|
|
|
class OptaTeamSchema(TeamSchema): |
|
"""Definition of a dataframe containing the list of teams of a game.""" |
|
|
|
|
|
class OptaEventSchema(EventSchema): |
|
"""Definition of a dataframe containing event stream data of a game.""" |
|
|
|
timestamp: Series[DateTime] |
|
"""Time in the match the event takes place, recorded to the millisecond.""" |
|
minute: Series[int] |
|
"""The minutes on the clock at the time of this event.""" |
|
second: Series[int] = pa.Field(ge=0, le=59) |
|
"""The second part of the timestamp.""" |
|
outcome: Series[bool] |
|
"""Whether the event had a successful outcome or not.""" |
|
start_x: Series[float] = pa.Field(nullable=True) |
|
"""The x coordinate of the location where the event started.""" |
|
start_y: Series[float] = pa.Field(nullable=True) |
|
"""The y coordinate of the location where the event started.""" |
|
end_x: Series[float] = pa.Field(nullable=True) |
|
"""The x coordinate of the location where the event ended.""" |
|
end_y: Series[float] = pa.Field(nullable=True) |
|
"""The y coordinate of the location where the event ended.""" |
|
qualifiers: Series[Object] |
|
"""A JSON object containing the Opta qualifiers of the event.""" |
|
assist: Optional[Series[bool]] |
|
"""Whether the event was an assist or not.""" |
|
keypass: Optional[Series[bool]] |
|
"""Whether the event was a keypass or not.""" |
|
goal: Optional[Series[bool]] |
|
"""Whether the event was a goal or not.""" |
|
shot: Optional[Series[bool]] |
|
"""Whether the event was a shot or not.""" |
|
touch: Optional[Series[bool]] |
|
"""Whether the event was a on-the-ball action or not.""" |
|
related_player_id: Optional[Series[pd.Int64Dtype]] = pa.Field(nullable=True) |
|
"""The ID of a second player that was involved in this event.""" |
|
|