File size: 2,805 Bytes
56f6887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import json, os
import pandas as pd
from tqdm import tqdm
from datetime import datetime

root = "cricsheet/all_json"

# print([json.load(open(os.path.join(root, f)))['meta']['data_version'] for f in os.listdir(root) if json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0'])
# print(set([json.load(open(os.path.join(root, f)))['info']['match_type'] for f in os.listdir(root) if f.endswith('.json') and json.load(open(os.path.join(root, f)))['meta']['data_version']=='1.1.0']))

# formats: 'ODI', 'MDM', 'IT20', 'ODM', 'Test', 'T20'


class Inning(object):
    def __init__(self, df, inning, format):
        self.df = df
        self.inning = inning
        self.final_score = df["run"].sum()
        self.format = format

    def settarget(self, target):
        if self.inning == 1:
            print("first innning: don't set target")
        self.target = target


def process_inning(ballbyball):
    score = []
    for over in ballbyball["overs"]:
        overall = []
        for ballcount, dlv in enumerate(over["deliveries"]):
            run = dlv["runs"]["total"]
            wicket = len(dlv.get("wickets", []))
            if ballcount < 6:
                overall.append((run, wicket))
            else:
                lastrun, lastwkt = overall.pop()
                overall.append((run + lastrun, wicket + lastwkt))
        score.extend(overall)
    df = pd.DataFrame(score, columns=["run", "wicket"], index=range(1, len(score) + 1))
    df.index.name = "balls"
    return df


def process_matches(matches, format):
    print("processing jsons...")
    ID = 0
    for match in tqdm(matches):
        if len(match) == 2:
            inning1, inning2 = [
                Inning(process_inning(inning), i + 1, format)
                for i, inning in enumerate(match)
            ]
            inning2.settarget(inning1.final_score)
            inning1.battingteam, inning2.battingteam = (
                match[0]["team"],
                match[1]["team"],
            )
            inning1.bowlingteam, inning2.bowlingteam = (
                match[1]["team"],
                match[0]["team"],
            )
            ID += 1
            inning1.matchid = inning2.matchid = ID
            yield inning1
            yield inning2


def get_all_matches(
    format,
    since=1990,
):
    matches = []
    print("Loading jsons...")
    for f in tqdm(os.listdir(root)[:]):
        if f.endswith(".json"):
            obj = json.load(open(os.path.join(root, f)))
            if (
                format in obj["info"]["match_type"]
                and int(datetime.strptime(obj["info"]["dates"][0], "%Y-%m-%d").year)
                >= since
            ):
                matches.append(obj["innings"])
    return list(process_matches(matches, format))


# get_all_T20s()