cricket-prophet / cricksheet.py
Instantaneous1's picture
first commit
56f6887
import json, os
import pandas as pd
from tqdm import tqdm
from datetime import datetime
root = "cricsheet/all_json"
# print([json.load(open(os.path.join(root, f)))['meta']['data_version'] for f in os.listdir(root) if json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0'])
# print(set([json.load(open(os.path.join(root, f)))['info']['match_type'] for f in os.listdir(root) if f.endswith('.json') and json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0']))
# formats: 'ODI', 'MDM', 'IT20', 'ODM', 'Test', 'T20'
class Inning(object):
def __init__(self, df, inning, format):
self.df = df
self.inning = inning
self.final_score = df["run"].sum()
self.format = format
def settarget(self, target):
if self.inning == 1:
print("first innning: don't set target")
self.target = target
def process_inning(ballbyball):
score = []
for over in ballbyball["overs"]:
overall = []
for ballcount, dlv in enumerate(over["deliveries"]):
run = dlv["runs"]["total"]
wicket = len(dlv.get("wickets", []))
if ballcount < 6:
overall.append((run, wicket))
else:
lastrun, lastwkt = overall.pop()
overall.append((run + lastrun, wicket + lastwkt))
score.extend(overall)
df = pd.DataFrame(score, columns=["run", "wicket"], index=range(1, len(score) + 1))
df.index.name = "balls"
return df
def process_matches(matches, format):
print("processing jsons...")
ID = 0
for match in tqdm(matches):
if len(match) == 2:
inning1, inning2 = [
Inning(process_inning(inning), i + 1, format)
for i, inning in enumerate(match)
]
inning2.settarget(inning1.final_score)
inning1.battingteam, inning2.battingteam = (
match[0]["team"],
match[1]["team"],
)
inning1.bowlingteam, inning2.bowlingteam = (
match[1]["team"],
match[0]["team"],
)
ID += 1
inning1.matchid = inning2.matchid = ID
yield inning1
yield inning2
def get_all_matches(
format,
since=1990,
):
matches = []
print("Loading jsons...")
for f in tqdm(os.listdir(root)[:]):
if f.endswith(".json"):
obj = json.load(open(os.path.join(root, f)))
if (
format in obj["info"]["match_type"]
and int(datetime.strptime(obj["info"]["dates"][0], "%Y-%m-%d").year)
>= since
):
matches.append(obj["innings"])
return list(process_matches(matches, format))
# get_all_T20s()