feat: filter supported quantizations in HuggingFace model info
Browse files- src/lib/huggingface.ts +2 -1
- src/types.ts +15 -4
src/lib/huggingface.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import { supportedPipelines } from '../components/PipelineSelector'
|
2 |
-
import { ModelInfoResponse, QuantizationType } from '../types'
|
3 |
|
4 |
const getModelInfo = async (
|
5 |
modelName: string,
|
@@ -69,6 +69,7 @@ const getModelInfo = async (
|
|
69 |
.filter((file) => file.endsWith('.onnx') && file.includes('_'))
|
70 |
.map((file) => file.split('/')[1].split('_')[1].split('.')[0])
|
71 |
.filter((q) => q !== 'quantized')
|
|
|
72 |
: []
|
73 |
const uniqueSupportedQuantizations = Array.from(
|
74 |
new Set(supportedQuantizations)
|
|
|
1 |
import { supportedPipelines } from '../components/PipelineSelector'
|
2 |
+
import { allQuantizationTypes, ModelInfoResponse, QuantizationType } from '../types'
|
3 |
|
4 |
const getModelInfo = async (
|
5 |
modelName: string,
|
|
|
69 |
.filter((file) => file.endsWith('.onnx') && file.includes('_'))
|
70 |
.map((file) => file.split('/')[1].split('_')[1].split('.')[0])
|
71 |
.filter((q) => q !== 'quantized')
|
72 |
+
.filter((q) => allQuantizationTypes.includes(q as QuantizationType))
|
73 |
: []
|
74 |
const uniqueSupportedQuantizations = Array.from(
|
75 |
new Set(supportedQuantizations)
|
src/types.ts
CHANGED
@@ -60,12 +60,23 @@ export interface TextGenerationWorkerInput {
|
|
60 |
do_sample?: boolean
|
61 |
}
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
|
|
|
|
|
|
|
|
|
|
68 |
export type QuantizationType = q8 | q4 | fp16 | fp32
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
export interface ModelInfo {
|
71 |
id: string
|
|
|
60 |
do_sample?: boolean
|
61 |
}
|
62 |
|
63 |
+
const q8Types = ['q8', 'int8', 'bnb8', 'uint8'] as const
|
64 |
+
const q4Types = ['q4', 'bnb4', 'q4f16'] as const
|
65 |
+
const fp16Types = ['fp16'] as const
|
66 |
+
const fp32Types = ['fp32'] as const
|
67 |
|
68 |
+
type q8 = (typeof q8Types)[number]
|
69 |
+
type q4 = (typeof q4Types)[number]
|
70 |
+
type fp16 = (typeof fp16Types)[number]
|
71 |
+
type fp32 = (typeof fp32Types)[number]
|
72 |
+
|
73 |
export type QuantizationType = q8 | q4 | fp16 | fp32
|
74 |
+
export const allQuantizationTypes = [
|
75 |
+
...q8Types,
|
76 |
+
...q4Types,
|
77 |
+
...fp16Types,
|
78 |
+
...fp32Types
|
79 |
+
] as const
|
80 |
|
81 |
export interface ModelInfo {
|
82 |
id: string
|