Xianbao QIAN
add docs
1818208
'use client';
import { useState, useEffect } from 'react';
import * as duckdb from '@duckdb/duckdb-wasm';
import Table from './components/Table';
import Modal from './components/Modal';
type ModelData = {
ancestor: string;
direct_children: string[] | null;
all_children: string[];
all_children_count: number;
direct_children_count: number | null;
};
type OrgData = {
org: string;
family_model_count: number;
family_direct_children_count: number;
family_all_children_count: number;
};
interface VectorLike<T> {
get(index: number): T;
length: number;
}
export default function Home() {
const [allModels, setAllModels] = useState<ModelData[]>([]);
const [orgData, setOrgData] = useState<OrgData[]>([]);
const [currentPage, setCurrentPage] = useState(1);
const [pageSize, setPageSize] = useState(100);
const [filterText, setFilterText] = useState('');
const [isLoading, setIsLoading] = useState(true);
const [orderBy, setOrderBy] = useState<'all_children' | 'direct_children'>('all_children');
const [activeTab, setActiveTab] = useState<'models' | 'orgs'>('models');
const [orgCurrentPage, setOrgCurrentPage] = useState(1);
const [orgPageSize, setOrgPageSize] = useState(100);
const [orgOrderBy, setOrgOrderBy] = useState<keyof OrgData>('family_all_children_count');
const [orgFilterText, setOrgFilterText] = useState('');
const [selectedModel, setSelectedModel] = useState<ModelData | null>(null);
const [selectedOrg, setSelectedOrg] = useState<string | null>(null);
const [selectedOrgModels, setSelectedOrgModels] = useState<ModelData[]>([]);
const [selectedModelChildren, setSelectedModelChildren] = useState<string[]>([]);
const [selectedModelChildrenType, setSelectedModelChildrenType] = useState<'direct' | 'all'>('all');
const [modelChildrenPage, setModelChildrenPage] = useState(1);
const [orgModelsPage, setOrgModelsPage] = useState(1);
const modalPageSize = 10;
useEffect(() => {
const urlParams = new URLSearchParams(window.location.search);
const tab = urlParams.get('tab');
const page = urlParams.get('page');
const order = urlParams.get('order');
const filter = urlParams.get('filter');
const orgFilter = urlParams.get('orgFilter');
if (tab === 'orgs') {
setActiveTab('orgs');
}
if (page) {
setCurrentPage(parseInt(page, 10));
}
if (order === 'direct_children') {
setOrderBy('direct_children');
} else {
setOrderBy('all_children');
}
if (filter) {
setFilterText(filter);
}
if (orgFilter) {
setOrgFilterText(orgFilter);
}
}, []);
useEffect(() => {
const urlParams = new URLSearchParams();
if (activeTab === 'orgs') {
urlParams.set('tab', 'orgs');
}
if (currentPage > 1) {
urlParams.set('page', currentPage.toString());
}
if (orderBy === 'direct_children') {
urlParams.set('order', 'direct_children');
}
if (filterText) {
urlParams.set('filter', filterText);
}
if (orgFilterText) {
urlParams.set('orgFilter', orgFilterText);
}
const newUrl = `${window.location.pathname}?${urlParams.toString()}`;
window.history.replaceState(null, '', newUrl);
}, [activeTab, currentPage, orderBy, filterText, orgFilterText]);
useEffect(() => {
async function fetchData() {
const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
// Select a bundle based on browser checks
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
const worker_url = URL.createObjectURL(
new Blob([`importScripts("${bundle.mainWorker!}");`], { type: 'text/javascript' })
);
// Instantiate the asynchronous version of DuckDB-Wasm
const worker = new Worker(worker_url);
const logger = new duckdb.ConsoleLogger();
const db = new duckdb.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
// Register the Parquet file using the URL
await db.registerFileURL(
'ancestor_children.parquet',
`${window.location.origin}/ancestor_children.parquet`,
duckdb.DuckDBDataProtocol.HTTP,
false
);
// Execute the SQL query using the registered Parquet file
const query = `
SELECT
ancestor,
direct_children,
all_children,
CAST(all_children_count AS INTEGER) AS all_children_count,
CAST(direct_children_count AS INTEGER) AS direct_children_count
FROM 'ancestor_children.parquet'
`;
const conn = await db.connect();
const result = await conn.query(query);
// Convert the result to a JavaScript array
const data: ModelData[] = result.toArray();
// Execute the SQL query to get the grouped org data with additional counts
const orgQuery = `
SELECT
SPLIT_PART(ancestor, '/', 1) AS org,
CAST(COUNT(DISTINCT ancestor) AS INTEGER) AS family_model_count,
CAST(SUM(direct_children_count) AS INTEGER) AS family_direct_children_count,
CAST(SUM(all_children_count) AS INTEGER) AS family_all_children_count
FROM 'ancestor_children.parquet'
GROUP BY org
ORDER BY family_all_children_count DESC
`;
const orgResult = await conn.query(orgQuery);
// Convert the org result to a JavaScript array
const orgData: OrgData[] = orgResult.toArray();
// Close the connection and terminate the worker
await conn.close();
await db.terminate();
setAllModels(data);
setOrgData(orgData);
setIsLoading(false);
}
fetchData();
}, []);
const filteredModels = allModels.filter((model) =>
model.ancestor.toLowerCase().includes(filterText.toLowerCase())
);
const sortedModels = filteredModels.sort((a, b) => {
if (orderBy === 'all_children') {
return b.all_children_count - a.all_children_count;
} else {
return (b.direct_children_count ?? 0) - (a.direct_children_count ?? 0);
}
});
const handleTabChange = (tab: 'models' | 'orgs') => {
setActiveTab(tab);
setCurrentPage(1);
setOrderBy('all_children');
setFilterText('');
setOrgFilterText('');
};
const handlePageChange = (page: number, tab: 'models' | 'orgs') => {
if (tab === 'models') {
setCurrentPage(page);
} else {
setOrgCurrentPage(page);
}
};
const handleOrderByClick = (column: 'all_children' | 'direct_children') => {
setOrderBy(column);
setCurrentPage(1);
};
const filteredOrgData = orgData.filter((org) =>
org.org.toLowerCase().includes(orgFilterText.toLowerCase())
);
const sortedOrgData = filteredOrgData.sort((a, b) => {
if (orgOrderBy === 'org') {
return a.org.localeCompare(b.org);
}
return b[orgOrderBy] - a[orgOrderBy];
});
const paginatedOrgData = sortedOrgData.slice(
(orgCurrentPage - 1) * orgPageSize,
orgCurrentPage * orgPageSize
);
const orgTotalPages = Math.ceil(sortedOrgData.length / orgPageSize);
const handleModelChildrenClick = (model: ModelData, type: 'direct' | 'all') => {
console.log('Model data:', model);
console.log('Children type:', type);
setSelectedModel(model);
setSelectedModelChildrenType(type);
let children: VectorLike<string> | string[];
if (type === 'direct') {
children = model.direct_children || [];
} else {
children = model.all_children || [];
}
console.log('Children:', children);
// Handle Vector-like data structure
if (children && typeof children === 'object' && 'get' in children && 'length' in children) {
const vectorChildren = [];
for (let i = 0; i < (children as VectorLike<string>).length; i++) {
vectorChildren.push((children as VectorLike<string>).get(i));
}
setSelectedModelChildren(vectorChildren);
} else if (Array.isArray(children)) {
setSelectedModelChildren(children);
} else {
console.error('Unexpected children data structure:', children);
setSelectedModelChildren([]);
}
};
const handleOrgModelsClick = (org: string) => {
setSelectedOrg(org);
const orgModels = allModels.filter((model) => model.ancestor.split('/')[0] === org);
setSelectedOrgModels(orgModels);
};
const handleModelChildrenPageChange = (page: number) => {
setModelChildrenPage(page);
};
const handleOrgModelsPageChange = (page: number) => {
setOrgModelsPage(page);
};
return (
<main className="container mx-auto py-8 bg-white dark:bg-gray-900 text-gray-900 dark:text-white">
<h1 className="text-4xl font-bold mb-4">Hugging Face Model Derivatives Explorer</h1>
<p className="mb-8">
This tool allows you to explore the popularity of Hugging Face models based on their derivatives. It pulls model data from publicly exposed files and calculates the number of direct and indirect children for each model using the <code>base_model</code> tag, up to 10 iterations for all children.
</p>
<div className="mb-8">
<h2 className="text-2xl font-bold mb-2">Definitions</h2>
<ul className="list-disc list-inside">
<li>
<strong>Direct Children:</strong> Models that are directly derived from the selected model, i.e., they have the selected model set as their <code>base_model</code>.
</li>
<li>
<strong>All Children:</strong> All models that are derived from the selected model, either directly or indirectly, up to 10 iterations deep. This includes models that have the selected model as their <code>base_model</code>, as well as models derived from those models, and so on.
</li>
</ul>
</div>
<div className="mb-4 flex space-x-4">
<a
href={`?tab=models`}
onClick={(e) => {
e.preventDefault();
handleTabChange('models');
}}
className={`px-4 py-2 rounded-md ${
activeTab === 'models'
? 'bg-blue-500 dark:bg-blue-600 text-white'
: 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-200'
}`}
>
Models
</a>
<a
href={`?tab=orgs`}
onClick={(e) => {
e.preventDefault();
handleTabChange('orgs');
}}
className={`px-4 py-2 rounded-md ${
activeTab === 'orgs'
? 'bg-blue-500 dark:bg-blue-600 text-white'
: 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-200'
}`}
>
Organizations
</a>
</div>
{activeTab === 'models' ? (
<>
<div className="mb-4">
<input
type="text"
placeholder="Filter by model name"
value={filterText}
onChange={(e) => setFilterText(e.target.value)}
className="px-4 py-2 border border-gray-300 dark:border-gray-700 rounded-md bg-white dark:bg-gray-800 text-gray-900 dark:text-white"
/>
</div>
{isLoading ? (
<p>Loading data...</p>
) : (
<Table
data={sortedModels}
columns={[
{
key: 'ancestor',
label: 'Model',
},
{
key: 'direct_children_count',
label: 'Direct Children',
render: (value, row) => (
<button
className="text-right text-blue-500 hover:underline"
onClick={() => handleModelChildrenClick(row, 'direct')}
>
{value ?? 0}
</button>
),
},
{
key: 'all_children_count',
label: 'All Children',
render: (value, row) => (
<button
className="text-right text-blue-500 hover:underline"
onClick={() => handleModelChildrenClick(row, 'all')}
>
{value}
</button>
),
},
]}
orderBy={orderBy}
onOrderByChange={(key) => {
if (key === 'all_children' || key === 'direct_children') {
setOrderBy(key);
setCurrentPage(1);
}
}}
pageSize={pageSize}
currentPage={currentPage}
onPageChange={(page) => handlePageChange(page, 'models')}
/>
)}
</>
) : (
<>
<div className="mb-4">
<input
type="text"
placeholder="Filter by organization name"
value={orgFilterText}
onChange={(e) => setOrgFilterText(e.target.value)}
className="px-4 py-2 border border-gray-300 dark:border-gray-700 rounded-md bg-white dark:bg-gray-800 text-gray-900 dark:text-white"
/>
</div>
{isLoading ? (
<p>Loading data...</p>
) : (
<Table
data={paginatedOrgData}
columns={[
{
key: 'org',
label: 'Organization',
},
{
key: 'family_model_count',
label: 'Model Count',
render: (value, row) => (
<button
className="text-right text-blue-500 hover:underline"
onClick={() => handleOrgModelsClick(row.org)}
>
{value}
</button>
),
},
{
key: 'family_direct_children_count',
label: 'Direct Children',
render: (value) => <span className="text-right">{value}</span>,
},
{
key: 'family_all_children_count',
label: 'All Children',
render: (value) => <span className="text-right">{value}</span>,
},
]}
orderBy={orgOrderBy}
onOrderByChange={(key) => setOrgOrderBy(key)}
pageSize={orgPageSize}
currentPage={orgCurrentPage}
onPageChange={(page) => handlePageChange(page, 'orgs')}
/>
)}
</>
)}
{selectedModel && (
<Modal onClose={() => {
setSelectedModel(null);
setModelChildrenPage(1);
}}>
<h2 className="text-2xl font-bold mb-4">
{selectedModelChildrenType === 'direct' ? 'Direct Children' : 'All Children'} of {selectedModel.ancestor}
</h2>
{selectedModelChildren.length > 0 ? (
<Table
data={selectedModelChildren.map((child, index) => ({ id: index, model: child }))}
columns={[{ key: 'model', label: 'Model' }]}
pageSize={modalPageSize}
currentPage={modelChildrenPage}
onPageChange={handleModelChildrenPageChange}
/>
) : (
<p>No children found for this model.</p>
)}
</Modal>
)}
{selectedOrg && (
<Modal onClose={() => {
setSelectedOrg(null);
setOrgModelsPage(1);
}}>
<h2 className="text-2xl font-bold mb-4">Models under {selectedOrg}</h2>
{selectedOrgModels.length > 0 ? (
<Table
data={selectedOrgModels}
columns={[
{
key: 'ancestor',
label: 'Model',
},
{
key: 'direct_children_count',
label: 'Direct Children',
render: (value) => <span className="text-right">{value ?? 0}</span>,
},
{
key: 'all_children_count',
label: 'All Children',
render: (value) => <span className="text-right">{value}</span>,
},
]}
pageSize={modalPageSize}
currentPage={orgModelsPage}
onPageChange={handleOrgModelsPageChange}
/>
) : (
<p>No models found for this organization.</p>
)}
</Modal>
)}
</main>
);
}