File size: 6,666 Bytes
633e5e5
 
5e26aa6
633e5e5
 
 
 
5e26aa6
 
 
633e5e5
 
 
 
 
 
 
 
 
5e26aa6
633e5e5
 
5e26aa6
633e5e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e26aa6
633e5e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e26aa6
 
 
633e5e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"use client";

import { Inter } from "next/font/google";
import ActivityCalendar from "react-activity-calendar";
import * as duckdb from "@duckdb/duckdb-wasm"
import { useState, useEffect } from "react";
import { Tooltip as MuiTooltip } from '@mui/material';

const inter = Inter({ subsets: ["latin"] });

interface CustomActivity {
  date: string;
  count: number;
  level: number;
  details: Array<{ provider: string; count: number }>;
}

type ProviderKey = "mistralai" | "meta-llama" | "openai" | "anthropic" | "google";

export default function Home() {
  const [calendarData, setCalendarData] = useState<Record<ProviderKey, CustomActivity[]>>({} as Record<ProviderKey, CustomActivity[]>);
  const [isLoading, setIsLoading] = useState(true);

  const PROVIDERS_MAP: Record<ProviderKey, { name: string; color: string }> = {
    "mistralai": {"name": "Mistral AI", "color": "#ff7000"},
    "meta-llama": {"name": "Meta", "color": "#0668E1"},
    "openai": {"name": "OpenAI", "color": "#10A37F"},
    "anthropic": {"name": "Anthropic", "color": "#cc785c"},
    "google": {"name": "Google", "color": "#4285F4"},
  }

  const getModelData = async (conn: duckdb.AsyncDuckDBConnection) => {
    const result = await conn.query(`
      SELECT 
        STRFTIME(DATE_TRUNC('day', CAST(createdAt AS DATE)), '%Y-%m-%d') AS date,
        CASE 
          WHEN SPLIT_PART(id, '/', 1) IN ('meta-llama', 'facebook') THEN 'meta-llama'
          ELSE SPLIT_PART(id, '/', 1)
        END AS provider,
        COUNT(*) AS count
      FROM models
      WHERE CAST(createdAt AS DATE) >= DATE_TRUNC('year', CURRENT_DATE)
      AND (
        SPLIT_PART(id, '/', 1) IN (${Object.keys(PROVIDERS_MAP).map(provider => `'${provider}'`).join(', ')})
        OR SPLIT_PART(id, '/', 1) = 'facebook'
      )
      GROUP BY DATE_TRUNC('day', CAST(createdAt AS DATE)), 
        CASE 
          WHEN SPLIT_PART(id, '/', 1) IN ('meta-llama', 'facebook') THEN 'meta-llama'
          ELSE SPLIT_PART(id, '/', 1)
        END
      ORDER BY date
    `);
    return result.toArray().map((row: any) => ({
      date: row.date,
      provider: row.provider,
      count: Number(row.count)
    }));
  }

  const generateCalendarData = (modelData: any[]) => {
    const data: Record<ProviderKey, CustomActivity[]> = Object.keys(PROVIDERS_MAP).reduce((acc, provider) => {
      acc[provider as ProviderKey] = [];
      return acc;
    }, {} as Record<ProviderKey, CustomActivity[]>);

    const today = new Date();
    const startOfYear = new Date(today.getFullYear(), 0, 1);

    for (let d = new Date(startOfYear); d <= today; d.setDate(d.getDate() + 1)) {
      const dateString = d.toISOString().split('T')[0];
      
      Object.keys(PROVIDERS_MAP).forEach((provider) => {
        const dayData = modelData.filter(item => item.date === dateString && item.provider === provider);
        const count = dayData.reduce((sum, item) => sum + item.count, 0);
        
        data[provider as ProviderKey].push({
          date: dateString,
          count,
          level: 0,   
          details: dayData,
        });
      });
    }

    const avgCounts: Record<ProviderKey, number> = Object.fromEntries(
      Object.keys(PROVIDERS_MAP).map(provider => [
        provider,
        data[provider as ProviderKey].reduce((sum, day) => sum + day.count, 0) / data[provider as ProviderKey].length || 0
      ])
    ) as Record<ProviderKey, number>;

    Object.entries(data).forEach(([provider, days]) => {
      const avgCount = avgCounts[provider as ProviderKey];
      days.forEach(day => {
        day.level = day.count === 0 ? 0 :
                    day.count <= avgCount * 0.5 ? 1 :
                    day.count <= avgCount ? 2 :
                    day.count <= avgCount * 1.5 ? 3 : 4;
      });
    });

    return data;
  }

  const initDB = async () => {
    const CDN_BASE = `https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@next`

    const JSDELIVR_BUNDLES = {
      mvp: {
        mainModule: `${CDN_BASE}/dist/duckdb-mvp.wasm`,
        mainWorker: `${CDN_BASE}/dist/duckdb-browser-mvp.worker.js`,
      },
      eh: {
        mainModule: `${CDN_BASE}/dist/duckdb-eh.wasm`,
        mainWorker: `${CDN_BASE}/dist/duckdb-browser-eh.worker.js`,
      },
    }

    const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES)
    const worker_url = URL.createObjectURL(
      new Blob([`importScripts("${bundle.mainWorker}");`], {
        type: "text/javascript",
      })
    )

    const worker = new Worker(worker_url)
    const logger = new duckdb.ConsoleLogger()
    const db = new duckdb.AsyncDuckDB(logger, worker)
    await db.instantiate(bundle.mainModule)

    const connection = await db.connect()

    await connection.query(`
        CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true');
    `);

    const modelData = await getModelData(connection);
    const calendarData = generateCalendarData(modelData);
    setCalendarData(calendarData);
    setIsLoading(false);

    await connection.close();
  }

  useEffect(() => {
    initDB();
  }, []);

  return (
    <main className={`flex flex-col items-center justify-center min-h-screen mx-auto p-24 ${inter.className}`}>
      <h1 className="text-5xl font-bold text-center">Open Source Calendar</h1>
      <p className="text-center mt-2 text-sm">A calendar for open source model releases.</p>
      <div className="mt-16">
        {isLoading ? (
          <p>Loading...</p>
        ) : (
          <>
            {Object.entries(PROVIDERS_MAP)
              .sort(([keyA], [keyB]) => 
                calendarData[keyB as ProviderKey].reduce((sum, day) => sum + day.count, 0) -
                calendarData[keyA as ProviderKey].reduce((sum, day) => sum + day.count, 0)
              )
              .map(([key, value]) => (
                <div key={key} className="mb-8">
                  <h2 className="text-2xl font-bold mb-2">{value.name}</h2>
                  <ActivityCalendar 
                    data={calendarData[key as ProviderKey]}
                    theme={{
                      dark: ['#161b22', value.color],
                    }}
                    colorScheme="dark"
                    renderBlock={(block, activity) => (
                      <MuiTooltip
                        title={`${activity.count} activities on ${activity.date}`}
                      >
                        {block}
                      </MuiTooltip>
                    )}
                  />
                </div>
              ))
            }
          </>
        )}
      </div>
    </main>
  );
}