Spaces:
Running
Running
disable two models that cannot run or too run too slowly on hf spaces with zerogpu
Browse files
app.py
CHANGED
|
@@ -55,10 +55,10 @@ MODELS = {
|
|
| 55 |
# "description": "4-bit AWQ quantized dense causal language model with 32.8B total parameters (31.2B non-embedding), 64 layers, 64 query heads & 8 KV heads, native 32,768-token context (extendable to 131,072 via YaRN). Features seamless switching between thinking mode (for complex reasoning, math, coding) and non-thinking mode (for efficient dialogue), strong multilingual support (100+ languages), and leading open-source agent capabilities."
|
| 56 |
# },
|
| 57 |
|
| 58 |
-
"gpt-oss-20b-BF16": {
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
},
|
| 62 |
|
| 63 |
"Apriel-1.5-15b-Thinker": {
|
| 64 |
"repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
|
@@ -79,10 +79,10 @@ MODELS = {
|
|
| 79 |
# "Apriel-1.5-15b-Thinker": { ... },
|
| 80 |
|
| 81 |
# 5B
|
| 82 |
-
"Apriel-5B-Instruct": {
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
},
|
| 86 |
|
| 87 |
# 4.3B
|
| 88 |
"Phi-4-mini-Reasoning": {
|
|
|
|
| 55 |
# "description": "4-bit AWQ quantized dense causal language model with 32.8B total parameters (31.2B non-embedding), 64 layers, 64 query heads & 8 KV heads, native 32,768-token context (extendable to 131,072 via YaRN). Features seamless switching between thinking mode (for complex reasoning, math, coding) and non-thinking mode (for efficient dialogue), strong multilingual support (100+ languages), and leading open-source agent capabilities."
|
| 56 |
# },
|
| 57 |
|
| 58 |
+
# "gpt-oss-20b-BF16": {
|
| 59 |
+
# "repo_id": "unsloth/gpt-oss-20b-BF16",
|
| 60 |
+
# "description": "A 20B-parameter open-source GPT-style language model quantized to INT4 using AutoRound, with FP8 key-value cache for efficient inference. Optimized for performance and memory efficiency on Intel hardware while maintaining strong language generation capabilities."
|
| 61 |
+
# },
|
| 62 |
|
| 63 |
"Apriel-1.5-15b-Thinker": {
|
| 64 |
"repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
|
|
|
| 79 |
# "Apriel-1.5-15b-Thinker": { ... },
|
| 80 |
|
| 81 |
# 5B
|
| 82 |
+
# "Apriel-5B-Instruct": {
|
| 83 |
+
# "repo_id": "ServiceNow-AI/Apriel-5B-Instruct",
|
| 84 |
+
# "description": "A 5B-parameter instruction-tuned model from ServiceNow’s Apriel series, optimized for enterprise tasks and general-purpose instruction following."
|
| 85 |
+
# },
|
| 86 |
|
| 87 |
# 4.3B
|
| 88 |
"Phi-4-mini-Reasoning": {
|