File size: 2,740 Bytes
fb753d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Generate benchmarking plots from serving results."""
import json
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os

RESULTS_DIR = '/Users/abhdey/Documents/My LLM/Research & Experiment/serving-results'
OUTPUT_DIR = '/Users/abhdey/Documents/My LLM/Research & Experiment/TinyLLMExperiment/serving-results'

models = ['tinystories_10m', 'tinystories_7m', 'tinystories_5m', 'tinystories_2_5m']
labels = ['10M', '7M', '5M', '2.5M']
colors = ['#0969da', '#e5383b', '#2d6a4f', '#9b5de5']

data = {}
for model, label in zip(models, labels):
    with open(f'{RESULTS_DIR}/{model}_samples.json') as f:
        samples = json.load(f)
    data[label] = [s['metrics'] for s in samples]

# Plot 1: Tokens vs Coherence
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
    tokens = [m['tokens_generated'] for m in data[label]]
    coherence = [m['coherence_length'] for m in data[label]]
    ax.scatter(tokens, coherence, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('Tokens Generated', fontsize=12)
ax.set_ylabel('Coherence Length', fontsize=12)
ax.set_title('Tokens Generated vs Coherence Length (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot1_tokens_vs_coherence.png', dpi=150)
plt.close()
print("Plot 1 saved: tokens vs coherence")

# Plot 2: Perplexity vs Repetition
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
    ppl = [m['perplexity'] for m in data[label]]
    rep = [m['repetition_rate'] * 100 for m in data[label]]
    ax.scatter(ppl, rep, c=color, alpha=0.6, s=30, label=label)
ax.set_xlabel('Perplexity', fontsize=12)
ax.set_ylabel('Repetition Rate (%)', fontsize=12)
ax.set_title('Perplexity vs Repetition Rate (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot2_perplexity_vs_repetition.png', dpi=150)
plt.close()
print("Plot 2 saved: perplexity vs repetition")

# Plot 3: Coherence Distribution
fig, ax = plt.subplots(figsize=(10, 6))
for label, color in zip(labels, colors):
    coherence = [m['coherence_length'] for m in data[label]]
    ax.hist(coherence, bins=20, alpha=0.5, color=color, label=label, edgecolor='white')
ax.set_xlabel('Coherence Length (tokens)', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.set_title('Coherence Length Distribution (100 samples per model)', fontsize=13)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/plot3_coherence_distribution.png', dpi=150)
plt.close()
print("Plot 3 saved: coherence distribution")

print(f"\nAll plots saved to: {OUTPUT_DIR}")