|
|
|
|
|
|
|
|
|
export async function* listChatModelsIterator(params = {}) { |
|
const opts = Object.assign({ maxCandidates: 250, concurrency: 12, hfToken: null, timeoutMs: 10000, maxListing: 5000 }, params || {}); |
|
const { maxCandidates, concurrency, hfToken, timeoutMs, maxListing } = opts; |
|
const MAX_TOTAL_TO_FETCH = Math.min(maxListing, 5000); |
|
const PAGE_SIZE = 1000; |
|
const RETRIES = 3; |
|
const BACKOFF_BASE_MS = 200; |
|
|
|
const inFlight = new Set(); |
|
|
|
|
|
const counters = { configFetch429: 0, configFetch200: 0, configFetchError: 0 }; |
|
const initialConcurrency = Math.max(1, concurrency || 1); |
|
let effectiveConcurrency = initialConcurrency; |
|
let availableTokens = effectiveConcurrency; |
|
const tokenWaiters = []; |
|
|
|
|
|
const recent429s = []; |
|
const RATE_WINDOW_MS = 30_000; |
|
const RATE_THRESHOLD = 10; |
|
const BACKOFF_WINDOW_MS = 30_000; |
|
let rateLimitedUntil = 0; |
|
|
|
function pruneOld429s(now) { |
|
while (recent429s.length && recent429s[0] < now - RATE_WINDOW_MS) recent429s.shift(); |
|
} |
|
|
|
function record429() { |
|
const now = Date.now(); |
|
recent429s.push(now); |
|
pruneOld429s(now); |
|
if (recent429s.length >= RATE_THRESHOLD) { |
|
|
|
const newEff = Math.max(1, Math.floor(effectiveConcurrency / 2)); |
|
const reduction = effectiveConcurrency - newEff; |
|
if (reduction > 0) { |
|
effectiveConcurrency = newEff; |
|
|
|
availableTokens = Math.max(0, availableTokens - reduction); |
|
} |
|
rateLimitedUntil = now + BACKOFF_WINDOW_MS; |
|
} |
|
} |
|
|
|
function maybeRestoreConcurrency() { |
|
const now = Date.now(); |
|
pruneOld429s(now); |
|
if (now < rateLimitedUntil) return; |
|
if (recent429s.length === 0 && effectiveConcurrency < initialConcurrency) { |
|
effectiveConcurrency = Math.min(initialConcurrency, effectiveConcurrency + 1); |
|
availableTokens = Math.min(availableTokens + 1, effectiveConcurrency); |
|
|
|
if (tokenWaiters.length) { |
|
const w = tokenWaiters.shift(); |
|
if (w) w(); |
|
} |
|
} |
|
} |
|
|
|
async function acquireToken() { |
|
if (availableTokens > 0) { |
|
availableTokens--; |
|
return; |
|
} |
|
|
|
await new Promise(resolve => tokenWaiters.push(resolve)); |
|
|
|
availableTokens = Math.max(0, availableTokens - 1); |
|
} |
|
|
|
function releaseToken() { |
|
availableTokens = Math.min(effectiveConcurrency, availableTokens + 1); |
|
if (tokenWaiters.length > 0 && availableTokens > 0) { |
|
const w = tokenWaiters.shift(); |
|
if (w) w(); |
|
} |
|
} |
|
|
|
|
|
async function fetchConfigForModel(modelId) { |
|
const urls = [ |
|
`https://huggingface.co/${encodeURIComponent(modelId)}/resolve/main/config.json`, |
|
`https://huggingface.co/${encodeURIComponent(modelId)}/resolve/main/config/config.json`, |
|
`https://huggingface.co/${encodeURIComponent(modelId)}/resolve/main/adapter_config.json` |
|
]; |
|
for (const url of urls) { |
|
for (let attempt = 0; attempt <= RETRIES; attempt++) { |
|
|
|
const controller = new AbortController(); |
|
inFlight.add(controller); |
|
try { |
|
const resp = await fetch( |
|
url, |
|
{ |
|
signal: controller.signal, |
|
headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {}, |
|
cache: 'force-cache' |
|
}); |
|
if (resp.status === 200) { |
|
const json = await resp.json(); |
|
counters.configFetch200++; |
|
return { status: 'ok', model_type: json.model_type || null, architectures: json.architectures || null }; |
|
} |
|
if (resp.status === 401 || resp.status === 403) return { status: 'auth', code: resp.status }; |
|
if (resp.status === 404) break; |
|
if (resp.status === 429) { |
|
counters.configFetch429++; |
|
|
|
record429(); |
|
const backoff = BACKOFF_BASE_MS * Math.pow(2, attempt); |
|
await new Promise(r => setTimeout(r, backoff)); |
|
continue; |
|
} |
|
counters.configFetchError++; |
|
return { status: 'error', code: resp.status, message: `fetch failed ${resp.status}` }; |
|
} catch (err) { |
|
if (attempt === RETRIES) { |
|
counters.configFetchError++; |
|
return { status: 'error', message: String(err) }; |
|
} |
|
const backoff = BACKOFF_BASE_MS * Math.pow(2, attempt); |
|
await new Promise(r => setTimeout(r, backoff)); |
|
} finally { |
|
try { inFlight.delete(controller); } catch (e) {} |
|
} |
|
} |
|
} |
|
return { status: 'no-config' }; |
|
} |
|
|
|
function classifyModel(rawModel, fetchResult) { |
|
const id = rawModel.modelId || rawModel.id || rawModel.model || rawModel.modelId; |
|
const hasTokenizer = rawModel.hasTokenizer || false; |
|
const hasOnnxModel = rawModel.hasOnnxModel || false; |
|
const isTransformersJsReady = rawModel.isTransformersJsReady || false; |
|
const entry = { id, model_type: null, architectures: null, classification: 'unknown', confidence: 'low', fetchStatus: 'error', hasTokenizer, hasOnnxModel, isTransformersJsReady }; |
|
if (!fetchResult) return entry; |
|
if (fetchResult.status === 'auth') { |
|
entry.classification = 'auth-protected'; |
|
entry.confidence = 'high'; |
|
entry.fetchStatus = String(fetchResult.code || 401); |
|
return entry; |
|
} |
|
if (fetchResult.status === 'ok') { |
|
entry.model_type = fetchResult.model_type || null; |
|
entry.architectures = Array.isArray(fetchResult.architectures) ? fetchResult.architectures : null; |
|
entry.fetchStatus = 'ok'; |
|
const deny = ['bert','roberta','distilbert','electra','albert','deberta','mobilebert','convbert','sentence-transformers']; |
|
const allow = ['gpt2','gptj','gpt_neox','llama','qwen','qwen2','mistral','phi','phi3','t5','bart','pegasus','gemma','gemma2','gemma3','falcon','bloom','lfm2']; |
|
if (entry.model_type && deny.includes(entry.model_type)) { entry.classification = 'encoder'; entry.confidence = 'high'; return entry; } |
|
if (entry.model_type && allow.includes(entry.model_type)) { entry.classification = 'gen'; entry.confidence = 'high'; return entry; } |
|
|
|
const normalizedModelType = entry.model_type && entry.model_type.replace(/[-_]/g, ''); |
|
if (normalizedModelType) { |
|
const normalizedAllow = allow.map(t => t.replace(/[-_]/g, '')); |
|
const normalizedDeny = deny.map(t => t.replace(/[-_]/g, '')); |
|
if (normalizedDeny.includes(normalizedModelType)) { entry.classification = 'encoder'; entry.confidence = 'high'; return entry; } |
|
if (normalizedAllow.includes(normalizedModelType)) { entry.classification = 'gen'; entry.confidence = 'high'; return entry; } |
|
} |
|
const arch = entry.architectures; |
|
if (arch && Array.isArray(arch)) { |
|
for (let i = 0; i < arch.length; i++) { |
|
const a = String(arch[i]).toLowerCase(); |
|
if (allow.includes(a)) { entry.classification = 'gen'; entry.confidence = 'high'; return entry; } |
|
if (deny.includes(a)) { entry.classification = 'encoder'; entry.confidence = 'high'; return entry; } |
|
} |
|
} |
|
entry.classification = 'unknown'; entry.confidence = 'low'; return entry; |
|
} |
|
if (fetchResult.status === 'no-config') { |
|
const pipeline = rawModel.pipeline_tag || ''; |
|
if (pipeline && pipeline.startsWith('text-generation')) { entry.classification = 'gen'; entry.confidence = 'medium'; } |
|
else entry.classification = 'unknown'; entry.confidence = 'low'; |
|
entry.fetchStatus = '404'; |
|
return entry; |
|
} |
|
if (fetchResult.status === 'error') { |
|
entry.classification = 'unknown'; entry.confidence = 'low'; entry.fetchStatus = 'error'; |
|
entry.fetchError = { message: fetchResult.message, code: fetchResult.code }; |
|
return entry; |
|
} |
|
return entry; |
|
} |
|
|
|
|
|
let listing = []; |
|
try { |
|
|
|
let offset = 0; |
|
while (listing.length < MAX_TOTAL_TO_FETCH) { |
|
const url = `https://huggingface.co/api/models?full=true&limit=${PAGE_SIZE}&offset=${offset}`; |
|
let ok = false; |
|
for (let attempt = 0; attempt <= RETRIES && !ok; attempt++) { |
|
try { |
|
const resp = await fetch( |
|
url, |
|
{ |
|
headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {}, |
|
cache: 'force-cache' |
|
} |
|
); |
|
if (resp.status === 429) { |
|
const backoff = BACKOFF_BASE_MS * Math.pow(2, attempt); |
|
await new Promise(r => setTimeout(r, backoff)); |
|
continue; |
|
} |
|
if (!resp.ok) throw Object.assign(new Error(`listing-fetch-failed:${resp.status}`), { code: 'listing_fetch_failed', status: resp.status }); |
|
const page = await resp.json(); |
|
if (!Array.isArray(page) || page.length === 0) { ok = true; break; } |
|
listing.push(...page); |
|
offset += PAGE_SIZE; |
|
ok = true; |
|
} catch (err) { |
|
if (attempt === RETRIES) throw err; |
|
await new Promise(r => setTimeout(r, BACKOFF_BASE_MS * Math.pow(2, attempt))); |
|
} |
|
} |
|
if (!ok) break; |
|
} |
|
|
|
|
|
yield { status: 'listing_done', totalFound: listing.length }; |
|
|
|
|
|
const denyPipeline = new Set(['feature-extraction', 'fill-mask', 'sentence-similarity', 'masked-lm']); |
|
const survivors = []; |
|
for (const m of listing) { |
|
if (survivors.length >= maxCandidates) break; |
|
const pipeline = m.pipeline_tag; |
|
if (pipeline && denyPipeline.has(pipeline)) continue; |
|
|
|
|
|
const modelId = (m.modelId || m.id || m.model || '').toString(); |
|
if (modelId && modelId.includes('sentence-transformers')) continue; |
|
|
|
|
|
const siblings = m.siblings || []; |
|
const hasTokenizer = siblings.some((s) => { |
|
if (!s) return false; |
|
let name = null; |
|
if (typeof s === 'string') name = s; |
|
else if (typeof s === 'object') name = s.rfilename || s.name || s.path || s.filename || s.repo_file || s.file || null; |
|
if (!name) return false; |
|
return /tokenizer|vocab|merges|sentencepiece/i.test(String(name)); |
|
}); |
|
|
|
|
|
const hasOnnxModel = siblings.some((s) => { |
|
if (!s) return false; |
|
let name = null; |
|
if (typeof s === 'string') name = s; |
|
else if (typeof s === 'object') name = s.rfilename || s.name || s.path || s.filename || s.repo_file || s.file || null; |
|
if (!name) return false; |
|
|
|
return /onnx\/.*\.onnx|onnx\\.*\.onnx|.*model.*\.onnx|.*decoder.*\.onnx/i.test(String(name)); |
|
}); |
|
|
|
|
|
|
|
if (!hasTokenizer || !hasOnnxModel) { |
|
|
|
if (!pipeline || !pipeline.toLowerCase().includes('text-generation')) continue; |
|
if (!hasTokenizer) continue; |
|
} |
|
|
|
|
|
const isTransformersJsReady = (m.library_name === 'transformers.js') || |
|
(Array.isArray(m.tags) && m.tags.includes('transformers.js')) || |
|
(Array.isArray(m.tags) && m.tags.includes('onnx')); |
|
|
|
|
|
m.hasTokenizer = hasTokenizer; |
|
m.hasOnnxModel = hasOnnxModel; |
|
m.isTransformersJsReady = isTransformersJsReady; |
|
|
|
survivors.push(m); |
|
} |
|
|
|
yield { status: 'prefiltered', survivors: survivors.length }; |
|
|
|
|
|
|
|
const results = []; |
|
const errors = []; |
|
let idx = 0; |
|
let processed = 0; |
|
const events = []; |
|
let resolveNext = null; |
|
function emit(ev) { |
|
events.push(ev); |
|
if (resolveNext) { |
|
resolveNext(); |
|
resolveNext = null; |
|
} |
|
} |
|
async function nextEvent() { |
|
while (events.length === 0) { |
|
await new Promise(r => { resolveNext = r; }); |
|
} |
|
return events.shift(); |
|
} |
|
|
|
|
|
const restoreInterval = setInterval(maybeRestoreConcurrency, 5000); |
|
|
|
const workerCount = Math.min(initialConcurrency, survivors.length || 1); |
|
const pool = new Array(workerCount).fill(0).map(async () => { |
|
while (true) { |
|
const i = idx++; |
|
if (i >= survivors.length) break; |
|
const model = survivors[i]; |
|
const modelId = model.modelId || model.id || model.model || model.modelId; |
|
try { |
|
|
|
await acquireToken(); |
|
emit({ modelId, status: 'config_fetching' }); |
|
const fetchResult = await fetchConfigForModel(modelId); |
|
const entry = classifyModel(model, fetchResult); |
|
results.push(entry); |
|
emit({ modelId, status: 'classified', data: entry }); |
|
} catch (err) { |
|
errors.push({ modelId, message: String(err) }); |
|
emit({ modelId, status: 'error', data: { message: String(err) } }); |
|
} finally { |
|
processed++; |
|
|
|
try { releaseToken(); } catch (e) {} |
|
} |
|
} |
|
}); |
|
|
|
|
|
while (processed < survivors.length) { |
|
const ev = await nextEvent(); |
|
yield ev; |
|
} |
|
|
|
|
|
while (events.length > 0) { |
|
yield events.shift(); |
|
} |
|
|
|
await Promise.all(pool); |
|
|
|
|
|
|
|
|
|
const authRequired = results.filter(r => r.classification === 'auth-protected').slice(0, 50); |
|
const genCapable = results.filter(r => r.classification === 'gen' && r.hasTokenizer && r.hasOnnxModel); |
|
|
|
|
|
genCapable.sort((a, b) => { |
|
if (a.isTransformersJsReady && !b.isTransformersJsReady) return -1; |
|
if (!a.isTransformersJsReady && b.isTransformersJsReady) return 1; |
|
return 0; |
|
}); |
|
|
|
const nonAuth = genCapable.slice(0, 50); |
|
const selected = nonAuth.concat(authRequired); |
|
const models = selected.map(r => ({ id: r.id, model_type: r.model_type, architectures: r.architectures, classification: r.classification, confidence: r.confidence, fetchStatus: r.fetchStatus, hasTokenizer: r.hasTokenizer, hasOnnxModel: r.hasOnnxModel, isTransformersJsReady: r.isTransformersJsReady })); |
|
const meta = { fetched: listing.length, filtered: survivors.length, errors, selected: { nonAuth: nonAuth.length, authRequired: authRequired.length, total: models.length } }; |
|
if (params && params.debug) meta.counters = Object.assign({}, counters); |
|
yield { status: 'done', models, meta }; |
|
} finally { |
|
|
|
for (const c of Array.from(inFlight)) try { c.abort(); } catch (e) {} |
|
|
|
try { clearInterval(restoreInterval); } catch (e) {} |
|
} |
|
} |
|
|