File size: 4,419 Bytes
651b002
 
 
 
 
 
 
b430afc
651b002
b430afc
 
651b002
 
b430afc
 
 
651b002
 
 
b430afc
651b002
 
 
b430afc
 
651b002
b430afc
651b002
 
 
b430afc
 
 
651b002
b430afc
651b002
 
 
 
 
 
 
 
 
 
 
 
 
 
303cbb8
b430afc
651b002
b430afc
651b002
b430afc
 
651b002
 
 
b430afc
651b002
b430afc
651b002
 
 
 
 
 
 
 
 
b430afc
 
 
 
 
 
 
 
651b002
 
b430afc
303cbb8
651b002
 
 
 
 
b430afc
 
651b002
 
b430afc
651b002
b430afc
651b002
303cbb8
651b002
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from tqdm.auto import trange
import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px


def make_plot(proj_dir, type_sequence, time_sequence, w, difficulty_distribution_padding, progress=gr.Progress(track_tqdm=True)):
    base = 1.01
    index_len = 793
    index_offset = 200
    d_range = 10
    d_offset = 1
    r_time = 8
    f_time = 25
    max_time = 200000

    type_block = dict()
    type_count = dict()
    type_time = dict()
    last_t = type_sequence[0]
    type_block[last_t] = 1
    type_count[last_t] = 1
    type_time[last_t] = time_sequence[0]
    for i,t in enumerate(type_sequence[1:]):
        type_count[t] = type_count.setdefault(t, 0) + 1
        type_time[t] = type_time.setdefault(t, 0) + time_sequence[i]
        if t != last_t:
            type_block[t] = type_block.setdefault(t, 0) + 1
        last_t = t

    r_time = round(type_time[1]/type_count[1]/1000, 1)

    if 2 in type_count and 2 in type_block:
        f_time = round(type_time[2]/type_block[2]/1000 + r_time, 1)

    def stability2index(stability):
        return int(round(np.log(stability) / np.log(base)) + index_offset)

    def init_stability(d):
        return max(((d - w[2]) / w[3] + 2) * w[1] + w[0], np.power(base, -index_offset))

    def cal_next_recall_stability(s, r, d, response):
        if response == 1:
            return s * (1 + np.exp(w[6]) * (11 - d) * np.power(s, w[7]) * (np.exp((1 - r) * w[8]) - 1))
        else:
            return w[9] * np.power(d, w[10]) * np.power(s, w[11]) * np.exp((1 - r) * w[12])

    stability_list = np.array([np.power(base, i - index_offset) for i in range(index_len)])
    # print(f"terminal stability: {stability_list.max(): .2f}")
    df = pd.DataFrame(columns=["retention", "difficulty", "time"])

    for percentage in trange(96, 66, -2, desc='Time vs Retention plot'):
        recall = percentage / 100
        time_list = np.zeros((d_range, index_len))
        time_list[:,:-1] = max_time
        for d in range(d_range, 0, -1):
            s0 = init_stability(d)
            s0_index = stability2index(s0)
            diff = max_time
            while diff > 0.1:
                s0_time = time_list[d - 1][s0_index]
                for s_index in range(index_len - 2, -1, -1):
                    stability = stability_list[s_index];
                    interval = max(1, round(stability * np.log(recall) / np.log(0.9)))
                    p_recall = np.power(0.9, interval / stability)
                    recall_s = cal_next_recall_stability(stability, p_recall, d, 1)
                    forget_d = min(d + d_offset, 10)
                    forget_s = cal_next_recall_stability(stability, p_recall, forget_d, 0)
                    recall_s_index = min(stability2index(recall_s), index_len - 1)
                    forget_s_index = min(max(stability2index(forget_s), 0), index_len - 1)
                    recall_time = time_list[d - 1][recall_s_index] + r_time
                    forget_time = time_list[forget_d - 1][forget_s_index] + f_time
                    exp_time = p_recall * recall_time + (1.0 - p_recall) * forget_time
                    if exp_time < time_list[d - 1][s_index]:
                        time_list[d - 1][s_index] = exp_time
                diff = s0_time - time_list[d - 1][s0_index]
            df.loc[0 if pd.isnull(df.index.max()) else df.index.max() + 1] = [recall, d, s0_time]


    df.sort_values(by=["difficulty", "retention"], inplace=True)
    df.to_csv(proj_dir/"expected_time.csv", index=False)
    # print("expected_repetitions.csv saved.")

    optimal_retention_list = np.zeros(10)
    df2 = pd.DataFrame()
    for d in range(1, d_range + 1):
        retention = df[df["difficulty"] == d]["retention"]
        time = df[df["difficulty"] == d]["time"]
        optimal_retention = retention.iat[time.argmin()]
        optimal_retention_list[d - 1] = optimal_retention
        df2 = df2.append(
            pd.DataFrame({'retention': retention, 'expected time': time, 'd': d, 'r': optimal_retention}))

    fig = px.line(df2, x="retention", y="expected time", color='d', log_y=True)

    # print(f"\n-----suggested retention: {np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}-----")
    suggested_retention_markdown = f"""# Suggested Retention: `{np.inner(difficulty_distribution_padding, optimal_retention_list):.2f}`"""
    return fig, suggested_retention_markdown